From 95b395963fed02cca8849137b375528a5fc94e35 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 8 Oct 2014 16:11:27 +0200 Subject: [PATCH 01/70] arm64/efi: efistub: jump to 'stext' directly, not through the header After the EFI stub has done its business, it jumps into the kernel by branching to offset #0 of the loaded Image, which is where it expects to find the header containing a 'branch to stext' instruction. However, the UEFI spec 2.1.1 states the following regarding PE/COFF image loading: "A UEFI image is loaded into memory through the LoadImage() Boot Service. This service loads an image with a PE32+ format into memory. This PE32+ loader is required to load all sections of the PE32+ image into memory." In other words, it is /not/ required to load parts of the image that are not covered by a PE/COFF section, so it may not have loaded the header at the expected offset, as it is not covered by any PE/COFF section. So instead, jump to 'stext' directly, which is at the base of the PE/COFF .text section, by supplying a symbol 'stext_offset' to efi-entry.o which contains the relative offset of stext into the Image. Also replace other open coded calculations of the same value with a reference to 'stext_offset' Acked-by: Mark Rutland Acked-by: Roy Franz Signed-off-by: Ard Biesheuvel --- arch/arm64/kernel/efi-entry.S | 3 ++- arch/arm64/kernel/head.S | 10 ++++++---- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/arch/arm64/kernel/efi-entry.S b/arch/arm64/kernel/efi-entry.S index 619b1dd7bcde..a0016d3a17da 100644 --- a/arch/arm64/kernel/efi-entry.S +++ b/arch/arm64/kernel/efi-entry.S @@ -61,7 +61,8 @@ ENTRY(efi_stub_entry) */ mov x20, x0 // DTB address ldr x0, [sp, #16] // relocated _text address - mov x21, x0 + ldr x21, =stext_offset + add x21, x0, x21 /* * Flush dcache covering current runtime addresses diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 0a6e4f924df8..8c06c9d269d2 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -132,6 +132,8 @@ efi_head: #endif #ifdef CONFIG_EFI + .globl stext_offset + .set stext_offset, stext - efi_head .align 3 pe_header: .ascii "PE" @@ -155,7 +157,7 @@ optional_header: .long 0 // SizeOfInitializedData .long 0 // SizeOfUninitializedData .long efi_stub_entry - efi_head // AddressOfEntryPoint - .long stext - efi_head // BaseOfCode + .long stext_offset // BaseOfCode extra_header_fields: .quad 0 // ImageBase @@ -172,7 +174,7 @@ extra_header_fields: .long _end - efi_head // SizeOfImage // Everything before the kernel image is considered part of the header - .long stext - efi_head // SizeOfHeaders + .long stext_offset // SizeOfHeaders .long 0 // CheckSum .short 0xa // Subsystem (EFI application) .short 0 // DllCharacteristics @@ -217,9 +219,9 @@ section_table: .byte 0 .byte 0 // end of 0 padding of section name .long _end - stext // VirtualSize - .long stext - efi_head // VirtualAddress + .long stext_offset // VirtualAddress .long _edata - stext // SizeOfRawData - .long stext - efi_head // PointerToRawData + .long stext_offset // PointerToRawData .long 0 // PointerToRelocations (0 for executables) .long 0 // PointerToLineNumbers (0 for executables) From ea6bc80d1819f307d98c6562c8ebb2c6c1297d47 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 10 Oct 2014 11:25:24 +0200 Subject: [PATCH 02/70] arm64/efi: set PE/COFF section alignment to 4 KB Position independent AArch64 code needs to be linked and loaded at the same relative offset from a 4 KB boundary, or adrp/add and adrp/ldr pairs will not work correctly. (This is how PC relative symbol references with a 4 GB reach are emitted) We need to declare this in the PE/COFF header, otherwise the PE/COFF loader may load the Image and invoke the stub at an offset which violates this rule. Reviewed-by: Roy Franz Acked-by: Mark Rutland Signed-off-by: Ard Biesheuvel --- arch/arm64/kernel/head.S | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 8c06c9d269d2..8ae84d8c2a8c 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -161,7 +161,7 @@ optional_header: extra_header_fields: .quad 0 // ImageBase - .long 0x20 // SectionAlignment + .long 0x1000 // SectionAlignment .long 0x8 // FileAlignment .short 0 // MajorOperatingSystemVersion .short 0 // MinorOperatingSystemVersion @@ -228,7 +228,15 @@ section_table: .short 0 // NumberOfRelocations (0 for executables) .short 0 // NumberOfLineNumbers (0 for executables) .long 0xe0500020 // Characteristics (section flags) - .align 5 + + /* + * EFI will load stext onwards at the 4k section alignment + * described in the PE/COFF header. To ensure that instruction + * sequences using an adrp and a :lo12: immediate will function + * correctly at this alignment, we must ensure that stext is + * placed at a 4k boundary in the Image to begin with. + */ + .align 12 #endif ENTRY(stext) From a352ea3e197b3aa74deb51728b050cd4a0c6105a Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 10 Oct 2014 18:42:55 +0200 Subject: [PATCH 03/70] arm64/efi: set PE/COFF file alignment to 512 bytes Change our PE/COFF header to use the minimum file alignment of 512 bytes (0x200), as mandated by the PE/COFF spec v8.3 Also update the linker script so that the Image file itself is also a round multiple of FileAlignment. Acked-by: Catalin Marinas Acked-by: Roy Franz Signed-off-by: Ard Biesheuvel --- arch/arm64/kernel/head.S | 2 +- arch/arm64/kernel/vmlinux.lds.S | 17 +++++++++++++++++ 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 8ae84d8c2a8c..5a76e3ab788c 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -162,7 +162,7 @@ optional_header: extra_header_fields: .quad 0 // ImageBase .long 0x1000 // SectionAlignment - .long 0x8 // FileAlignment + .long PECOFF_FILE_ALIGNMENT // FileAlignment .short 0 // MajorOperatingSystemVersion .short 0 // MinorOperatingSystemVersion .short 0 // MajorImageVersion diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index edf8715ba39b..4596f46d0244 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -32,6 +32,22 @@ jiffies = jiffies_64; *(.hyp.text) \ VMLINUX_SYMBOL(__hyp_text_end) = .; +/* + * The size of the PE/COFF section that covers the kernel image, which + * runs from stext to _edata, must be a round multiple of the PE/COFF + * FileAlignment, which we set to its minimum value of 0x200. 'stext' + * itself is 4 KB aligned, so padding out _edata to a 0x200 aligned + * boundary should be sufficient. + */ +PECOFF_FILE_ALIGNMENT = 0x200; + +#ifdef CONFIG_EFI +#define PECOFF_EDATA_PADDING \ + .pecoff_edata_padding : { BYTE(0); . = ALIGN(PECOFF_FILE_ALIGNMENT); } +#else +#define PECOFF_EDATA_PADDING +#endif + SECTIONS { /* @@ -103,6 +119,7 @@ SECTIONS _data = .; _sdata = .; RW_DATA_SECTION(64, PAGE_SIZE, THREAD_SIZE) + PECOFF_EDATA_PADDING _edata = .; BSS_SECTION(0, 0, 0) From 61139eb04056bba69aeef6c481802c4ea028bf4d Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 20 Oct 2014 15:31:57 +0200 Subject: [PATCH 04/70] arm64/efi: invert UEFI memory region reservation logic Instead of reserving the memory regions based on which types we know need to be reserved, consider only regions of the following types as free for general use by the OS: EFI_LOADER_CODE EFI_LOADER_DATA EFI_BOOT_SERVICES_CODE EFI_BOOT_SERVICES_DATA EFI_CONVENTIONAL_MEMORY Note that this also fixes a problem with the original code, which would misidentify a EFI_RUNTIME_SERVICES_DATA region as not reserved if it does not have the EFI_MEMORY_RUNTIME attribute set. However, it is perfectly legal for the firmware not to request a virtual mapping for EFI_RUNTIME_SERVICES_DATA regions that contain configuration tables, in which case the EFI_MEMORY_RUNTIME attribute would not be set. Acked-by: Roy Franz Signed-off-by: Ard Biesheuvel --- arch/arm64/kernel/efi.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c index 95c49ebc660d..2e829148fb36 100644 --- a/arch/arm64/kernel/efi.c +++ b/arch/arm64/kernel/efi.c @@ -125,17 +125,17 @@ out: */ static __init int is_reserve_region(efi_memory_desc_t *md) { - if (!is_normal_ram(md)) + switch (md->type) { + case EFI_LOADER_CODE: + case EFI_LOADER_DATA: + case EFI_BOOT_SERVICES_CODE: + case EFI_BOOT_SERVICES_DATA: + case EFI_CONVENTIONAL_MEMORY: return 0; - - if (md->attribute & EFI_MEMORY_RUNTIME) - return 1; - - if (md->type == EFI_ACPI_RECLAIM_MEMORY || - md->type == EFI_RESERVED_TYPE) - return 1; - - return 0; + default: + break; + } + return is_normal_ram(md); } static __init void reserve_regions(void) From 4e27d4754e8990da264c1e01e2f6bd8340e30cb3 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 17 Oct 2014 12:44:11 +0200 Subject: [PATCH 05/70] arm64/efi: drop redundant set_bit(EFI_CONFIG_TABLES) The EFI_CONFIG_TABLES bit already gets set by efi_config_init(), so there is no reason to set it again after this function returns successfully. Acked-by: Will Deacon Signed-off-by: Ard Biesheuvel --- arch/arm64/kernel/efi.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c index 2e829148fb36..558572ef1ea3 100644 --- a/arch/arm64/kernel/efi.c +++ b/arch/arm64/kernel/efi.c @@ -112,8 +112,6 @@ static int __init uefi_init(void) efi.systab->hdr.revision & 0xffff, vendor); retval = efi_config_init(NULL); - if (retval == 0) - set_bit(EFI_CONFIG_TABLES, &efi.flags); out: early_memunmap(efi.systab, sizeof(efi_system_table_t)); From e1ccbbc9d5aa01a6c1c9c78acea6515db4f1be71 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 14 Oct 2014 16:34:47 +0200 Subject: [PATCH 06/70] efi: dmi: add support for SMBIOS 3.0 UEFI configuration table This adds support to the UEFI side for detecting the presence of a SMBIOS 3.0 64-bit entry point. This allows the actual SMBIOS structure table to reside at a physical offset over 4 GB, which cannot be supported by the legacy SMBIOS 32-bit entry point. Since the firmware can legally provide both entry points, store the SMBIOS 3.0 entry point in a separate variable, and let the DMI decoding layer decide which one will be used. Tested-by: Suravee Suthikulpanit Acked-by: Leif Lindholm Acked-by: Matt Fleming Signed-off-by: Ard Biesheuvel --- drivers/firmware/efi/efi.c | 4 ++++ drivers/xen/efi.c | 1 + include/linux/efi.h | 6 +++++- 3 files changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c index 8590099ac148..9035c1b74d58 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -30,6 +30,7 @@ struct efi __read_mostly efi = { .acpi = EFI_INVALID_TABLE_ADDR, .acpi20 = EFI_INVALID_TABLE_ADDR, .smbios = EFI_INVALID_TABLE_ADDR, + .smbios3 = EFI_INVALID_TABLE_ADDR, .sal_systab = EFI_INVALID_TABLE_ADDR, .boot_info = EFI_INVALID_TABLE_ADDR, .hcdp = EFI_INVALID_TABLE_ADDR, @@ -86,6 +87,8 @@ static ssize_t systab_show(struct kobject *kobj, str += sprintf(str, "ACPI=0x%lx\n", efi.acpi); if (efi.smbios != EFI_INVALID_TABLE_ADDR) str += sprintf(str, "SMBIOS=0x%lx\n", efi.smbios); + if (efi.smbios3 != EFI_INVALID_TABLE_ADDR) + str += sprintf(str, "SMBIOS3=0x%lx\n", efi.smbios3); if (efi.hcdp != EFI_INVALID_TABLE_ADDR) str += sprintf(str, "HCDP=0x%lx\n", efi.hcdp); if (efi.boot_info != EFI_INVALID_TABLE_ADDR) @@ -260,6 +263,7 @@ static __initdata efi_config_table_type_t common_tables[] = { {MPS_TABLE_GUID, "MPS", &efi.mps}, {SAL_SYSTEM_TABLE_GUID, "SALsystab", &efi.sal_systab}, {SMBIOS_TABLE_GUID, "SMBIOS", &efi.smbios}, + {SMBIOS3_TABLE_GUID, "SMBIOS 3.0", &efi.smbios3}, {UGA_IO_PROTOCOL_GUID, "UGA", &efi.uga}, {NULL_GUID, NULL, NULL}, }; diff --git a/drivers/xen/efi.c b/drivers/xen/efi.c index 1f850c97482f..f745db270171 100644 --- a/drivers/xen/efi.c +++ b/drivers/xen/efi.c @@ -294,6 +294,7 @@ static const struct efi efi_xen __initconst = { .acpi = EFI_INVALID_TABLE_ADDR, .acpi20 = EFI_INVALID_TABLE_ADDR, .smbios = EFI_INVALID_TABLE_ADDR, + .smbios3 = EFI_INVALID_TABLE_ADDR, .sal_systab = EFI_INVALID_TABLE_ADDR, .boot_info = EFI_INVALID_TABLE_ADDR, .hcdp = EFI_INVALID_TABLE_ADDR, diff --git a/include/linux/efi.h b/include/linux/efi.h index 0949f9c7e872..0238d612750e 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -547,6 +547,9 @@ void efi_native_runtime_setup(void); #define SMBIOS_TABLE_GUID \ EFI_GUID( 0xeb9d2d31, 0x2d88, 0x11d3, 0x9a, 0x16, 0x0, 0x90, 0x27, 0x3f, 0xc1, 0x4d ) +#define SMBIOS3_TABLE_GUID \ + EFI_GUID( 0xf2fd1544, 0x9794, 0x4a2c, 0x99, 0x2e, 0xe5, 0xbb, 0xcf, 0x20, 0xe3, 0x94 ) + #define SAL_SYSTEM_TABLE_GUID \ EFI_GUID( 0xeb9d2d32, 0x2d88, 0x11d3, 0x9a, 0x16, 0x0, 0x90, 0x27, 0x3f, 0xc1, 0x4d ) @@ -810,7 +813,8 @@ extern struct efi { unsigned long mps; /* MPS table */ unsigned long acpi; /* ACPI table (IA64 ext 0.71) */ unsigned long acpi20; /* ACPI table (ACPI 2.0) */ - unsigned long smbios; /* SM BIOS table */ + unsigned long smbios; /* SMBIOS table (32 bit entry point) */ + unsigned long smbios3; /* SMBIOS table (64 bit entry point) */ unsigned long sal_systab; /* SAL system table */ unsigned long boot_info; /* boot info table */ unsigned long hcdp; /* HCDP table */ From fc43026278b23b3515cf8f909ec29df94b3ae1a2 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 14 Oct 2014 16:41:27 +0200 Subject: [PATCH 07/70] dmi: add support for SMBIOS 3.0 64-bit entry point The DMTF SMBIOS reference spec v3.0.0 defines a new 64-bit entry point, which enables support for SMBIOS structure tables residing at a physical offset over 4 GB. This is especially important for upcoming arm64 platforms whose system RAM resides entirely above the 4 GB boundary. For the UEFI case, this code attempts to detect the new SMBIOS 3.0 header magic at the offset passed in the SMBIOS3_TABLE_GUID UEFI configuration table. If this configuration table is not provided, or if we fail to parse the header, we fall back to using the legacy SMBIOS_TABLE_GUID configuration table. This is in line with the spec, that allows both configuration tables to be provided, but mandates that they must point to the same structure table, unless the version pointed to by the 64-bit entry point is a superset of the 32-bit one. For the non-UEFI case, the detection logic is modified to look for the SMBIOS 3.0 header magic before it looks for the legacy header magic. Note that this patch is based on version 3.0.0d [draft] of the specification, which is expected not to deviate from the final version in ways that would affect the correctness of this implementation. Tested-by: Suravee Suthikulpanit Acked-by: Leif Lindholm Tested-by: Leif Lindholm Cc: Andrew Morton Cc: Tony Luck Acked-by: Matt Fleming Signed-off-by: Ard Biesheuvel --- drivers/firmware/dmi_scan.c | 79 +++++++++++++++++++++++++++++++++---- 1 file changed, 72 insertions(+), 7 deletions(-) diff --git a/drivers/firmware/dmi_scan.c b/drivers/firmware/dmi_scan.c index 17afc51f3054..c5f7b4e9eb6c 100644 --- a/drivers/firmware/dmi_scan.c +++ b/drivers/firmware/dmi_scan.c @@ -92,6 +92,12 @@ static void dmi_table(u8 *buf, int len, int num, while ((i < num) && (data - buf + sizeof(struct dmi_header)) <= len) { const struct dmi_header *dm = (const struct dmi_header *)data; + /* + * 7.45 End-of-Table (Type 127) [SMBIOS reference spec v3.0.0] + */ + if (dm->type == DMI_ENTRY_END_OF_TABLE) + break; + /* * We want to know the total length (formatted area and * strings) before decoding to make sure we won't run off the @@ -107,7 +113,7 @@ static void dmi_table(u8 *buf, int len, int num, } } -static u32 dmi_base; +static phys_addr_t dmi_base; static u16 dmi_len; static u16 dmi_num; @@ -467,7 +473,7 @@ static int __init dmi_present(const u8 *buf) if (memcmp(buf, "_SM_", 4) == 0 && buf[5] < 32 && dmi_checksum(buf, buf[5])) { - smbios_ver = (buf[6] << 8) + buf[7]; + smbios_ver = get_unaligned_be16(buf + 6); /* Some BIOS report weird SMBIOS version, fix that up */ switch (smbios_ver) { @@ -489,10 +495,9 @@ static int __init dmi_present(const u8 *buf) buf += 16; if (memcmp(buf, "_DMI_", 5) == 0 && dmi_checksum(buf, 15)) { - dmi_num = (buf[13] << 8) | buf[12]; - dmi_len = (buf[7] << 8) | buf[6]; - dmi_base = (buf[11] << 24) | (buf[10] << 16) | - (buf[9] << 8) | buf[8]; + dmi_num = get_unaligned_le16(buf + 12); + dmi_len = get_unaligned_le16(buf + 6); + dmi_base = get_unaligned_le32(buf + 8); if (dmi_walk_early(dmi_decode) == 0) { if (smbios_ver) { @@ -514,12 +519,72 @@ static int __init dmi_present(const u8 *buf) return 1; } +/* + * Check for the SMBIOS 3.0 64-bit entry point signature. Unlike the legacy + * 32-bit entry point, there is no embedded DMI header (_DMI_) in here. + */ +static int __init dmi_smbios3_present(const u8 *buf) +{ + if (memcmp(buf, "_SM3_", 5) == 0 && + buf[6] < 32 && dmi_checksum(buf, buf[6])) { + dmi_ver = get_unaligned_be16(buf + 7); + dmi_len = get_unaligned_le32(buf + 12); + dmi_base = get_unaligned_le64(buf + 16); + + /* + * The 64-bit SMBIOS 3.0 entry point no longer has a field + * containing the number of structures present in the table. + * Instead, it defines the table size as a maximum size, and + * relies on the end-of-table structure type (#127) to be used + * to signal the end of the table. + * So let's define dmi_num as an upper bound as well: each + * structure has a 4 byte header, so dmi_len / 4 is an upper + * bound for the number of structures in the table. + */ + dmi_num = dmi_len / 4; + + if (dmi_walk_early(dmi_decode) == 0) { + pr_info("SMBIOS %d.%d present.\n", + dmi_ver >> 8, dmi_ver & 0xFF); + dmi_format_ids(dmi_ids_string, sizeof(dmi_ids_string)); + pr_debug("DMI: %s\n", dmi_ids_string); + return 0; + } + } + return 1; +} + void __init dmi_scan_machine(void) { char __iomem *p, *q; char buf[32]; if (efi_enabled(EFI_CONFIG_TABLES)) { + /* + * According to the DMTF SMBIOS reference spec v3.0.0, it is + * allowed to define both the 64-bit entry point (smbios3) and + * the 32-bit entry point (smbios), in which case they should + * either both point to the same SMBIOS structure table, or the + * table pointed to by the 64-bit entry point should contain a + * superset of the table contents pointed to by the 32-bit entry + * point (section 5.2) + * This implies that the 64-bit entry point should have + * precedence if it is defined and supported by the OS. If we + * have the 64-bit entry point, but fail to decode it, fall + * back to the legacy one (if available) + */ + if (efi.smbios3 != EFI_INVALID_TABLE_ADDR) { + p = dmi_early_remap(efi.smbios3, 32); + if (p == NULL) + goto error; + memcpy_fromio(buf, p, 32); + dmi_early_unmap(p, 32); + + if (!dmi_smbios3_present(buf)) { + dmi_available = 1; + goto out; + } + } if (efi.smbios == EFI_INVALID_TABLE_ADDR) goto error; @@ -552,7 +617,7 @@ void __init dmi_scan_machine(void) memset(buf, 0, 16); for (q = p; q < p + 0x10000; q += 16) { memcpy_fromio(buf + 16, q, 16); - if (!dmi_present(buf)) { + if (!dmi_smbios3_present(buf) || !dmi_present(buf)) { dmi_available = 1; dmi_early_unmap(p, 0x10000); goto out; From d1ae8c0057921681ca489bba7efbfacbb60d0f28 Mon Sep 17 00:00:00 2001 From: Yi Li Date: Sat, 4 Oct 2014 23:46:43 +0800 Subject: [PATCH 08/70] arm64: dmi: Add SMBIOS/DMI support SMBIOS is important for server hardware vendors. It implements a spec for providing descriptive information about the platform. Things like serial numbers, physical layout of the ports, build configuration data, and the like. Signed-off-by: Yi Li Tested-by: Suravee Suthikulpanit Tested-by: Leif Lindholm Signed-off-by: Ard Biesheuvel --- arch/arm64/Kconfig | 11 +++++++++++ arch/arm64/include/asm/dmi.h | 31 +++++++++++++++++++++++++++++++ arch/arm64/kernel/efi.c | 13 +++++++++++++ 3 files changed, 55 insertions(+) create mode 100644 arch/arm64/include/asm/dmi.h diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 9532f8d5857e..2c3c2ca6f8bc 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -401,6 +401,17 @@ config EFI allow the kernel to be booted as an EFI application. This is only useful on systems that have UEFI firmware. +config DMI + bool "Enable support for SMBIOS (DMI) tables" + depends on EFI + default y + help + This enables SMBIOS/DMI feature for systems. + + This option is only useful on systems that have UEFI firmware. + However, even with this option, the resultant kernel should + continue to boot on existing non-UEFI platforms. + endmenu menu "Userspace binary formats" diff --git a/arch/arm64/include/asm/dmi.h b/arch/arm64/include/asm/dmi.h new file mode 100644 index 000000000000..69d37d87b159 --- /dev/null +++ b/arch/arm64/include/asm/dmi.h @@ -0,0 +1,31 @@ +/* + * arch/arm64/include/asm/dmi.h + * + * Copyright (C) 2013 Linaro Limited. + * Written by: Yi Li (yi.li@linaro.org) + * + * based on arch/ia64/include/asm/dmi.h + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + */ + +#ifndef __ASM_DMI_H +#define __ASM_DMI_H + +#include +#include + +/* + * According to section 2.3.6 of the UEFI spec, the firmware should not + * request a virtual mapping for configuration tables such as SMBIOS. + * This means we have to map them before use. + */ +#define dmi_early_remap(x, l) ioremap_cache(x, l) +#define dmi_early_unmap(x, l) iounmap(x) +#define dmi_remap(x, l) ioremap_cache(x, l) +#define dmi_unmap(x) iounmap(x) +#define dmi_alloc(l) kzalloc(l, GFP_KERNEL) + +#endif diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c index 558572ef1ea3..9ae5e7918b8f 100644 --- a/arch/arm64/kernel/efi.c +++ b/arch/arm64/kernel/efi.c @@ -11,6 +11,7 @@ * */ +#include #include #include #include @@ -469,3 +470,15 @@ err_unmap: return -1; } early_initcall(arm64_enter_virtual_mode); + +static int __init arm64_dmi_init(void) +{ + /* + * On arm64, DMI depends on UEFI, and dmi_scan_machine() needs to + * be called early because dmi_id_init(), which is an arch_initcall + * itself, depends on dmi_scan_machine() having been called already. + */ + dmi_scan_machine(); + return 0; +} +core_initcall(arm64_dmi_init); From b07bfaa3c126e4e36a2b59350b3b930aa1b121ac Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 15 Oct 2014 09:36:50 +0200 Subject: [PATCH 09/70] arm64: dmi: set DMI string as dump stack arch description This sets the DMI string, containing system type, serial number, firmware version etc. as dump stack arch description, so that oopses and other kernel stack dumps automatically have this information included, if available. Tested-by: Leif Lindholm Acked-by: Leif Lindholm Signed-off-by: Ard Biesheuvel --- arch/arm64/kernel/efi.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c index 9ae5e7918b8f..6fac253bc783 100644 --- a/arch/arm64/kernel/efi.c +++ b/arch/arm64/kernel/efi.c @@ -479,6 +479,8 @@ static int __init arm64_dmi_init(void) * itself, depends on dmi_scan_machine() having been called already. */ dmi_scan_machine(); + if (dmi_available) + dmi_set_dump_stack_arch_desc(); return 0; } core_initcall(arm64_dmi_init); From 0bcaa9040d058684d58c36ef273b8946996c7078 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 23 Oct 2014 16:33:33 +0100 Subject: [PATCH 10/70] efi: efi-stub: notify on DTB absence In the absence of a DTB configuration table, the EFI stub will happily continue attempting to boot a kernel, despite the fact that this kernel may not function without a description of the hardware. In this case, as with a typo'd "dtb=" option (e.g. "dbt=") or many other possible failures, the only output seen by the user will be the rather terse output from the EFI stub: EFI stub: Booting Linux Kernel... To aid those attempting to debug such failures, this patch adds a notice when no DTB is found, making the output more helpful: EFI stub: Booting Linux Kernel... EFI stub: Generating empty DTB Additionally, a positive acknowledgement is added when a user-specified DTB is in use: EFI stub: Booting Linux Kernel... EFI stub: Using DTB from command line Similarly, a positive acknowledgement is added when a DTB from a configuration table is in use: EFI stub: Booting Linux Kernel... EFI stub: Using DTB from configuration table Signed-off-by: Mark Rutland Acked-by: Leif Lindholm Acked-by: Ard Biesheuvel Acked-by: Roy Franz Acked-by: Matt Fleming Signed-off-by: Ard Biesheuvel --- drivers/firmware/efi/libstub/arm-stub.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/firmware/efi/libstub/arm-stub.c b/drivers/firmware/efi/libstub/arm-stub.c index 75ee05964cbc..eb48a1a1a576 100644 --- a/drivers/firmware/efi/libstub/arm-stub.c +++ b/drivers/firmware/efi/libstub/arm-stub.c @@ -247,9 +247,18 @@ unsigned long __init efi_entry(void *handle, efi_system_table_t *sys_table, goto fail_free_cmdline; } } - if (!fdt_addr) + + if (fdt_addr) { + pr_efi(sys_table, "Using DTB from command line\n"); + } else { /* Look for a device tree configuration table entry. */ fdt_addr = (uintptr_t)get_fdt(sys_table); + if (fdt_addr) + pr_efi(sys_table, "Using DTB from configuration table\n"); + } + + if (!fdt_addr) + pr_efi(sys_table, "Generating empty DTB\n"); status = handle_cmdline_files(sys_table, image, cmdline_ptr, "initrd=", dram_base + SZ_512M, From 4ee20980812b5b1800221996ec438689f61e98b4 Mon Sep 17 00:00:00 2001 From: Min-Hua Chen Date: Thu, 9 Oct 2014 16:53:10 +0100 Subject: [PATCH 11/70] arm64: fix data type for physical address Use phys_addr_t for physical address in alloc_init_pud. Although phys_addr_t and unsigned long are 64 bit in arm64, it is better to use phys_addr_t to describe physical addresses. Signed-off-by: Min-Hua Chen Signed-off-by: Will Deacon --- arch/arm64/mm/mmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 0bf90d26e745..f4f8b500f74c 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -202,7 +202,7 @@ static void __init alloc_init_pmd(pud_t *pud, unsigned long addr, } static void __init alloc_init_pud(pgd_t *pgd, unsigned long addr, - unsigned long end, unsigned long phys, + unsigned long end, phys_addr_t phys, int map_io) { pud_t *pud; From 70ddb63a88bfd45eb6abe36e2bf4f8f351a447d7 Mon Sep 17 00:00:00 2001 From: Joonwoo Park Date: Tue, 21 Oct 2014 01:59:03 +0100 Subject: [PATCH 12/70] arm64: optimize memcpy_{from,to}io() and memset_io() Optimize memcpy_{from,to}io() and memset_io() by transferring in 64 bit as much as possible with minimized barrier usage. This simplest optimization brings faster throughput compare to current byte-by-byte read and write with barrier in the loop. Code's skeleton is taken from the powerpc. Link: http://lkml.kernel.org/p/20141020133304.GH23751@e104818-lin.cambridge.arm.com Reviewed-by: Catalin Marinas Reviewed-by: Trilok Soni Signed-off-by: Joonwoo Park Signed-off-by: Will Deacon --- arch/arm64/kernel/io.c | 72 ++++++++++++++++++++++++++++++++++-------- 1 file changed, 59 insertions(+), 13 deletions(-) diff --git a/arch/arm64/kernel/io.c b/arch/arm64/kernel/io.c index 7d37ead4d199..354be2a872ae 100644 --- a/arch/arm64/kernel/io.c +++ b/arch/arm64/kernel/io.c @@ -25,12 +25,26 @@ */ void __memcpy_fromio(void *to, const volatile void __iomem *from, size_t count) { - unsigned char *t = to; - while (count) { - count--; - *t = readb(from); - t++; + while (count && (!IS_ALIGNED((unsigned long)from, 8) || + !IS_ALIGNED((unsigned long)to, 8))) { + *(u8 *)to = __raw_readb(from); from++; + to++; + count--; + } + + while (count >= 8) { + *(u64 *)to = __raw_readq(from); + from += 8; + to += 8; + count -= 8; + } + + while (count) { + *(u8 *)to = __raw_readb(from); + from++; + to++; + count--; } } EXPORT_SYMBOL(__memcpy_fromio); @@ -40,12 +54,26 @@ EXPORT_SYMBOL(__memcpy_fromio); */ void __memcpy_toio(volatile void __iomem *to, const void *from, size_t count) { - const unsigned char *f = from; - while (count) { - count--; - writeb(*f, to); - f++; + while (count && (!IS_ALIGNED((unsigned long)to, 8) || + !IS_ALIGNED((unsigned long)from, 8))) { + __raw_writeb(*(volatile u8 *)from, to); + from++; to++; + count--; + } + + while (count >= 8) { + __raw_writeq(*(volatile u64 *)from, to); + from += 8; + to += 8; + count -= 8; + } + + while (count) { + __raw_writeb(*(volatile u8 *)from, to); + from++; + to++; + count--; } } EXPORT_SYMBOL(__memcpy_toio); @@ -55,10 +83,28 @@ EXPORT_SYMBOL(__memcpy_toio); */ void __memset_io(volatile void __iomem *dst, int c, size_t count) { - while (count) { - count--; - writeb(c, dst); + u64 qc = (u8)c; + + qc |= qc << 8; + qc |= qc << 16; + qc |= qc << 32; + + while (count && !IS_ALIGNED((unsigned long)dst, 8)) { + __raw_writeb(c, dst); dst++; + count--; + } + + while (count >= 8) { + __raw_writeq(qc, dst); + dst += 8; + count -= 8; + } + + while (count) { + __raw_writeb(c, dst); + dst++; + count--; } } EXPORT_SYMBOL(__memset_io); From 5284e1b4bc8ae6fcc1c92c63cf6c876a53292f82 Mon Sep 17 00:00:00 2001 From: Steve Capper Date: Fri, 24 Oct 2014 13:22:20 +0100 Subject: [PATCH 13/70] arm64: xchg: Implement cmpxchg_double The arm64 architecture has the ability to exclusively load and store a pair of registers from an address (ldxp/stxp). Also the SLUB can take advantage of a cmpxchg_double implementation to avoid taking some locks. This patch provides an implementation of cmpxchg_double for 64-bit pairs, and activates the logic required for the SLUB to use these functions (HAVE_ALIGNED_STRUCT_PAGE and HAVE_CMPXCHG_DOUBLE). Also definitions of this_cpu_cmpxchg_8 and this_cpu_cmpxchg_double_8 are wired up to cmpxchg_local and cmpxchg_double_local (rather than the stock implementations that perform non-atomic operations with interrupts disabled) as they are used by the SLUB. On a Juno platform running on only the A57s I get quite a noticeable performance improvement with 5 runs of hackbench on v3.17: Baseline | With Patch -----------------+----------- Mean 119.2312 | 106.1782 StdDev 0.4919 | 0.4494 (times taken to complete `./hackbench 100 process 1000', in seconds) Signed-off-by: Steve Capper Signed-off-by: Will Deacon --- arch/arm64/Kconfig | 2 + arch/arm64/include/asm/cmpxchg.h | 71 ++++++++++++++++++++++++++++++++ 2 files changed, 73 insertions(+) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 2c3c2ca6f8bc..1e0e4671dd25 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -34,6 +34,7 @@ config ARM64 select GENERIC_TIME_VSYSCALL select HANDLE_DOMAIN_IRQ select HARDIRQS_SW_RESEND + select HAVE_ALIGNED_STRUCT_PAGE if SLUB select HAVE_ARCH_AUDITSYSCALL select HAVE_ARCH_JUMP_LABEL select HAVE_ARCH_KGDB @@ -41,6 +42,7 @@ config ARM64 select HAVE_BPF_JIT select HAVE_C_RECORDMCOUNT select HAVE_CC_STACKPROTECTOR + select HAVE_CMPXCHG_DOUBLE select HAVE_DEBUG_BUGVERBOSE select HAVE_DEBUG_KMEMLEAK select HAVE_DMA_API_DEBUG diff --git a/arch/arm64/include/asm/cmpxchg.h b/arch/arm64/include/asm/cmpxchg.h index ddb9d7830558..89e397befad5 100644 --- a/arch/arm64/include/asm/cmpxchg.h +++ b/arch/arm64/include/asm/cmpxchg.h @@ -19,6 +19,7 @@ #define __ASM_CMPXCHG_H #include +#include #include @@ -152,6 +153,51 @@ static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old, return oldval; } +#define system_has_cmpxchg_double() 1 + +static inline int __cmpxchg_double(volatile void *ptr1, volatile void *ptr2, + unsigned long old1, unsigned long old2, + unsigned long new1, unsigned long new2, int size) +{ + unsigned long loop, lost; + + switch (size) { + case 8: + VM_BUG_ON((unsigned long *)ptr2 - (unsigned long *)ptr1 != 1); + do { + asm volatile("// __cmpxchg_double8\n" + " ldxp %0, %1, %2\n" + " eor %0, %0, %3\n" + " eor %1, %1, %4\n" + " orr %1, %0, %1\n" + " mov %w0, #0\n" + " cbnz %1, 1f\n" + " stxp %w0, %5, %6, %2\n" + "1:\n" + : "=&r"(loop), "=&r"(lost), "+Q" (*(u64 *)ptr1) + : "r" (old1), "r"(old2), "r"(new1), "r"(new2)); + } while (loop); + break; + default: + BUILD_BUG(); + } + + return !lost; +} + +static inline int __cmpxchg_double_mb(volatile void *ptr1, volatile void *ptr2, + unsigned long old1, unsigned long old2, + unsigned long new1, unsigned long new2, int size) +{ + int ret; + + smp_mb(); + ret = __cmpxchg_double(ptr1, ptr2, old1, old2, new1, new2, size); + smp_mb(); + + return ret; +} + static inline unsigned long __cmpxchg_mb(volatile void *ptr, unsigned long old, unsigned long new, int size) { @@ -182,6 +228,31 @@ static inline unsigned long __cmpxchg_mb(volatile void *ptr, unsigned long old, __ret; \ }) +#define cmpxchg_double(ptr1, ptr2, o1, o2, n1, n2) \ +({\ + int __ret;\ + __ret = __cmpxchg_double_mb((ptr1), (ptr2), (unsigned long)(o1), \ + (unsigned long)(o2), (unsigned long)(n1), \ + (unsigned long)(n2), sizeof(*(ptr1)));\ + __ret; \ +}) + +#define cmpxchg_double_local(ptr1, ptr2, o1, o2, n1, n2) \ +({\ + int __ret;\ + __ret = __cmpxchg_double((ptr1), (ptr2), (unsigned long)(o1), \ + (unsigned long)(o2), (unsigned long)(n1), \ + (unsigned long)(n2), sizeof(*(ptr1)));\ + __ret; \ +}) + +#define this_cpu_cmpxchg_8(ptr, o, n) \ + cmpxchg_local(raw_cpu_ptr(&(ptr)), o, n) + +#define this_cpu_cmpxchg_double_8(ptr1, ptr2, o1, o2, n1, n2) \ + cmpxchg_double_local(raw_cpu_ptr(&(ptr1)), raw_cpu_ptr(&(ptr2)), \ + o1, o2, n1, n2) + #define cmpxchg64(ptr,o,n) cmpxchg((ptr),(o),(n)) #define cmpxchg64_local(ptr,o,n) cmpxchg_local((ptr),(o),(n)) From d8c6d8b877cb2a216e933ce11954632a9daeb362 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 28 Oct 2014 12:24:20 +0000 Subject: [PATCH 14/70] arm64/dt: add machine name to kernel call stack dump output This installs the machine name as recorded by setup_machine_fdt() as dump stack arch description. This results in the string to be included in call stack dumps, as is shown here: ... Bad mode in Synchronous Abort handler detected, code 0x84000005 CPU: 0 PID: 1 Comm: swapper/0 Not tainted 3.18.0-rc2+ #548 > Hardware name: linux,dummy-virt (DT) task: ffffffc07c870000 ti: ffffffc07c878000 task.ti: ffffffc07c878000 PC is at 0x0 ... Note that systems that support DMI/SMBIOS may override this later. Signed-off-by: Ard Biesheuvel Signed-off-by: Will Deacon --- arch/arm64/kernel/setup.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 2437196cc5d4..cbeaa2f3844b 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -312,6 +312,7 @@ static void __init setup_machine_fdt(phys_addr_t dt_phys) } machine_name = of_flat_dt_get_machine_name(); + dump_stack_set_arch_desc("%s (DT)", machine_name); } /* From 286fb1cc32b11c18da3573a8c8c37a4f9da16e30 Mon Sep 17 00:00:00 2001 From: Geoff Levand Date: Fri, 31 Oct 2014 23:06:47 +0000 Subject: [PATCH 15/70] arm64/kvm: Fix assembler compatibility of macros Some of the macros defined in kvm_arm.h are useful in assembly files, but are not compatible with the assembler. Change any C language integer constant definitions using appended U, UL, or ULL to the UL() preprocessor macro. Also, add a preprocessor include of the asm/memory.h file which defines the UL() macro. Fixes build errors like these when using kvm_arm.h in assembly source files: Error: unexpected characters following instruction at operand 3 -- `and x0,x1,#((1U<<25)-1)' Acked-by: Mark Rutland Signed-off-by: Geoff Levand Signed-off-by: Will Deacon --- arch/arm64/include/asm/kvm_arm.h | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index 7fd3e27e3ccc..8afb863f5a9e 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -18,6 +18,7 @@ #ifndef __ARM64_KVM_ARM_H__ #define __ARM64_KVM_ARM_H__ +#include #include /* Hyp Configuration Register (HCR) bits */ @@ -160,9 +161,9 @@ #endif #define VTTBR_BADDR_SHIFT (VTTBR_X - 1) -#define VTTBR_BADDR_MASK (((1LLU << (PHYS_MASK_SHIFT - VTTBR_X)) - 1) << VTTBR_BADDR_SHIFT) -#define VTTBR_VMID_SHIFT (48LLU) -#define VTTBR_VMID_MASK (0xffLLU << VTTBR_VMID_SHIFT) +#define VTTBR_BADDR_MASK (((UL(1) << (PHYS_MASK_SHIFT - VTTBR_X)) - 1) << VTTBR_BADDR_SHIFT) +#define VTTBR_VMID_SHIFT (UL(48)) +#define VTTBR_VMID_MASK (UL(0xFF) << VTTBR_VMID_SHIFT) /* Hyp System Trap Register */ #define HSTR_EL2_TTEE (1 << 16) @@ -185,13 +186,13 @@ /* Exception Syndrome Register (ESR) bits */ #define ESR_EL2_EC_SHIFT (26) -#define ESR_EL2_EC (0x3fU << ESR_EL2_EC_SHIFT) -#define ESR_EL2_IL (1U << 25) +#define ESR_EL2_EC (UL(0x3f) << ESR_EL2_EC_SHIFT) +#define ESR_EL2_IL (UL(1) << 25) #define ESR_EL2_ISS (ESR_EL2_IL - 1) #define ESR_EL2_ISV_SHIFT (24) -#define ESR_EL2_ISV (1U << ESR_EL2_ISV_SHIFT) +#define ESR_EL2_ISV (UL(1) << ESR_EL2_ISV_SHIFT) #define ESR_EL2_SAS_SHIFT (22) -#define ESR_EL2_SAS (3U << ESR_EL2_SAS_SHIFT) +#define ESR_EL2_SAS (UL(3) << ESR_EL2_SAS_SHIFT) #define ESR_EL2_SSE (1 << 21) #define ESR_EL2_SRT_SHIFT (16) #define ESR_EL2_SRT_MASK (0x1f << ESR_EL2_SRT_SHIFT) @@ -205,16 +206,16 @@ #define ESR_EL2_FSC_TYPE (0x3c) #define ESR_EL2_CV_SHIFT (24) -#define ESR_EL2_CV (1U << ESR_EL2_CV_SHIFT) +#define ESR_EL2_CV (UL(1) << ESR_EL2_CV_SHIFT) #define ESR_EL2_COND_SHIFT (20) -#define ESR_EL2_COND (0xfU << ESR_EL2_COND_SHIFT) +#define ESR_EL2_COND (UL(0xf) << ESR_EL2_COND_SHIFT) #define FSC_FAULT (0x04) #define FSC_PERM (0x0c) /* Hyp Prefetch Fault Address Register (HPFAR/HDFAR) */ -#define HPFAR_MASK (~0xFUL) +#define HPFAR_MASK (~UL(0xf)) #define ESR_EL2_EC_UNKNOWN (0x00) #define ESR_EL2_EC_WFI (0x01) From 12ac3efe74f888a13cb4df88b38bb01e8034dea8 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 3 Nov 2014 16:50:01 +0000 Subject: [PATCH 16/70] arm64/crypto: use crypto instructions to generate AES key schedule This patch implements the AES key schedule generation using ARMv8 Crypto Instructions. It replaces the table based C implementation in aes_generic.ko, which means we can drop the dependency on that module. Tested-by: Steve Capper Acked-by: Steve Capper Signed-off-by: Ard Biesheuvel Signed-off-by: Will Deacon --- arch/arm64/crypto/Kconfig | 5 +- arch/arm64/crypto/aes-ce-ccm-glue.c | 4 +- arch/arm64/crypto/aes-ce-cipher.c | 112 +++++++++++++++++++++++++++- arch/arm64/crypto/aes-ce-setkey.h | 5 ++ arch/arm64/crypto/aes-glue.c | 18 +++-- 5 files changed, 133 insertions(+), 11 deletions(-) create mode 100644 arch/arm64/crypto/aes-ce-setkey.h diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig index 5562652c5316..a38b02ce5f9a 100644 --- a/arch/arm64/crypto/Kconfig +++ b/arch/arm64/crypto/Kconfig @@ -27,20 +27,19 @@ config CRYPTO_AES_ARM64_CE tristate "AES core cipher using ARMv8 Crypto Extensions" depends on ARM64 && KERNEL_MODE_NEON select CRYPTO_ALGAPI - select CRYPTO_AES config CRYPTO_AES_ARM64_CE_CCM tristate "AES in CCM mode using ARMv8 Crypto Extensions" depends on ARM64 && KERNEL_MODE_NEON select CRYPTO_ALGAPI - select CRYPTO_AES + select CRYPTO_AES_ARM64_CE select CRYPTO_AEAD config CRYPTO_AES_ARM64_CE_BLK tristate "AES in ECB/CBC/CTR/XTS modes using ARMv8 Crypto Extensions" depends on ARM64 && KERNEL_MODE_NEON select CRYPTO_BLKCIPHER - select CRYPTO_AES + select CRYPTO_AES_ARM64_CE select CRYPTO_ABLK_HELPER config CRYPTO_AES_ARM64_NEON_BLK diff --git a/arch/arm64/crypto/aes-ce-ccm-glue.c b/arch/arm64/crypto/aes-ce-ccm-glue.c index 9e6cdde9b43d..0ac73b838fa3 100644 --- a/arch/arm64/crypto/aes-ce-ccm-glue.c +++ b/arch/arm64/crypto/aes-ce-ccm-glue.c @@ -16,6 +16,8 @@ #include #include +#include "aes-ce-setkey.h" + static int num_rounds(struct crypto_aes_ctx *ctx) { /* @@ -48,7 +50,7 @@ static int ccm_setkey(struct crypto_aead *tfm, const u8 *in_key, struct crypto_aes_ctx *ctx = crypto_aead_ctx(tfm); int ret; - ret = crypto_aes_expand_key(ctx, in_key, key_len); + ret = ce_aes_expandkey(ctx, in_key, key_len); if (!ret) return 0; diff --git a/arch/arm64/crypto/aes-ce-cipher.c b/arch/arm64/crypto/aes-ce-cipher.c index 2075e1acae6b..ce47792a983d 100644 --- a/arch/arm64/crypto/aes-ce-cipher.c +++ b/arch/arm64/crypto/aes-ce-cipher.c @@ -14,6 +14,8 @@ #include #include +#include "aes-ce-setkey.h" + MODULE_DESCRIPTION("Synchronous AES cipher using ARMv8 Crypto Extensions"); MODULE_AUTHOR("Ard Biesheuvel "); MODULE_LICENSE("GPL v2"); @@ -124,6 +126,114 @@ static void aes_cipher_decrypt(struct crypto_tfm *tfm, u8 dst[], u8 const src[]) kernel_neon_end(); } +/* + * aes_sub() - use the aese instruction to perform the AES sbox substitution + * on each byte in 'input' + */ +static u32 aes_sub(u32 input) +{ + u32 ret; + + __asm__("dup v1.4s, %w[in] ;" + "movi v0.16b, #0 ;" + "aese v0.16b, v1.16b ;" + "umov %w[out], v0.4s[0] ;" + + : [out] "=r"(ret) + : [in] "r"(input) + : "v0","v1"); + + return ret; +} + +int ce_aes_expandkey(struct crypto_aes_ctx *ctx, const u8 *in_key, + unsigned int key_len) +{ + /* + * The AES key schedule round constants + */ + static u8 const rcon[] = { + 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36, + }; + + u32 kwords = key_len / sizeof(u32); + struct aes_block *key_enc, *key_dec; + int i, j; + + if (key_len != AES_KEYSIZE_128 && + key_len != AES_KEYSIZE_192 && + key_len != AES_KEYSIZE_256) + return -EINVAL; + + memcpy(ctx->key_enc, in_key, key_len); + ctx->key_length = key_len; + + kernel_neon_begin_partial(2); + for (i = 0; i < sizeof(rcon); i++) { + u32 *rki = ctx->key_enc + (i * kwords); + u32 *rko = rki + kwords; + + rko[0] = ror32(aes_sub(rki[kwords - 1]), 8) ^ rcon[i] ^ rki[0]; + rko[1] = rko[0] ^ rki[1]; + rko[2] = rko[1] ^ rki[2]; + rko[3] = rko[2] ^ rki[3]; + + if (key_len == AES_KEYSIZE_192) { + if (i >= 7) + break; + rko[4] = rko[3] ^ rki[4]; + rko[5] = rko[4] ^ rki[5]; + } else if (key_len == AES_KEYSIZE_256) { + if (i >= 6) + break; + rko[4] = aes_sub(rko[3]) ^ rki[4]; + rko[5] = rko[4] ^ rki[5]; + rko[6] = rko[5] ^ rki[6]; + rko[7] = rko[6] ^ rki[7]; + } + } + + /* + * Generate the decryption keys for the Equivalent Inverse Cipher. + * This involves reversing the order of the round keys, and applying + * the Inverse Mix Columns transformation on all but the first and + * the last one. + */ + key_enc = (struct aes_block *)ctx->key_enc; + key_dec = (struct aes_block *)ctx->key_dec; + j = num_rounds(ctx); + + key_dec[0] = key_enc[j]; + for (i = 1, j--; j > 0; i++, j--) + __asm__("ld1 {v0.16b}, %[in] ;" + "aesimc v1.16b, v0.16b ;" + "st1 {v1.16b}, %[out] ;" + + : [out] "=Q"(key_dec[i]) + : [in] "Q"(key_enc[j]) + : "v0","v1"); + key_dec[i] = key_enc[0]; + + kernel_neon_end(); + return 0; +} +EXPORT_SYMBOL(ce_aes_expandkey); + +int ce_aes_setkey(struct crypto_tfm *tfm, const u8 *in_key, + unsigned int key_len) +{ + struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm); + int ret; + + ret = ce_aes_expandkey(ctx, in_key, key_len); + if (!ret) + return 0; + + tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; + return -EINVAL; +} +EXPORT_SYMBOL(ce_aes_setkey); + static struct crypto_alg aes_alg = { .cra_name = "aes", .cra_driver_name = "aes-ce", @@ -135,7 +245,7 @@ static struct crypto_alg aes_alg = { .cra_cipher = { .cia_min_keysize = AES_MIN_KEY_SIZE, .cia_max_keysize = AES_MAX_KEY_SIZE, - .cia_setkey = crypto_aes_set_key, + .cia_setkey = ce_aes_setkey, .cia_encrypt = aes_cipher_encrypt, .cia_decrypt = aes_cipher_decrypt } diff --git a/arch/arm64/crypto/aes-ce-setkey.h b/arch/arm64/crypto/aes-ce-setkey.h new file mode 100644 index 000000000000..f08a6471d034 --- /dev/null +++ b/arch/arm64/crypto/aes-ce-setkey.h @@ -0,0 +1,5 @@ + +int ce_aes_setkey(struct crypto_tfm *tfm, const u8 *in_key, + unsigned int key_len); +int ce_aes_expandkey(struct crypto_aes_ctx *ctx, const u8 *in_key, + unsigned int key_len); diff --git a/arch/arm64/crypto/aes-glue.c b/arch/arm64/crypto/aes-glue.c index 79cd911ef88c..801aae32841f 100644 --- a/arch/arm64/crypto/aes-glue.c +++ b/arch/arm64/crypto/aes-glue.c @@ -16,9 +16,13 @@ #include #include +#include "aes-ce-setkey.h" + #ifdef USE_V8_CRYPTO_EXTENSIONS #define MODE "ce" #define PRIO 300 +#define aes_setkey ce_aes_setkey +#define aes_expandkey ce_aes_expandkey #define aes_ecb_encrypt ce_aes_ecb_encrypt #define aes_ecb_decrypt ce_aes_ecb_decrypt #define aes_cbc_encrypt ce_aes_cbc_encrypt @@ -30,6 +34,8 @@ MODULE_DESCRIPTION("AES-ECB/CBC/CTR/XTS using ARMv8 Crypto Extensions"); #else #define MODE "neon" #define PRIO 200 +#define aes_setkey crypto_aes_set_key +#define aes_expandkey crypto_aes_expand_key #define aes_ecb_encrypt neon_aes_ecb_encrypt #define aes_ecb_decrypt neon_aes_ecb_decrypt #define aes_cbc_encrypt neon_aes_cbc_encrypt @@ -79,10 +85,10 @@ static int xts_set_key(struct crypto_tfm *tfm, const u8 *in_key, struct crypto_aes_xts_ctx *ctx = crypto_tfm_ctx(tfm); int ret; - ret = crypto_aes_expand_key(&ctx->key1, in_key, key_len / 2); + ret = aes_expandkey(&ctx->key1, in_key, key_len / 2); if (!ret) - ret = crypto_aes_expand_key(&ctx->key2, &in_key[key_len / 2], - key_len / 2); + ret = aes_expandkey(&ctx->key2, &in_key[key_len / 2], + key_len / 2); if (!ret) return 0; @@ -288,7 +294,7 @@ static struct crypto_alg aes_algs[] = { { .min_keysize = AES_MIN_KEY_SIZE, .max_keysize = AES_MAX_KEY_SIZE, .ivsize = AES_BLOCK_SIZE, - .setkey = crypto_aes_set_key, + .setkey = aes_setkey, .encrypt = ecb_encrypt, .decrypt = ecb_decrypt, }, @@ -306,7 +312,7 @@ static struct crypto_alg aes_algs[] = { { .min_keysize = AES_MIN_KEY_SIZE, .max_keysize = AES_MAX_KEY_SIZE, .ivsize = AES_BLOCK_SIZE, - .setkey = crypto_aes_set_key, + .setkey = aes_setkey, .encrypt = cbc_encrypt, .decrypt = cbc_decrypt, }, @@ -324,7 +330,7 @@ static struct crypto_alg aes_algs[] = { { .min_keysize = AES_MIN_KEY_SIZE, .max_keysize = AES_MAX_KEY_SIZE, .ivsize = AES_BLOCK_SIZE, - .setkey = crypto_aes_set_key, + .setkey = aes_setkey, .encrypt = ctr_encrypt, .decrypt = ctr_encrypt, }, From 80708677fab44a1489a089e162455dfb4dd722cd Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 4 Nov 2014 10:50:16 +0000 Subject: [PATCH 17/70] arm64: log physical ID of boot CPU In certain debugging scenarios it's useful to know the physical ID (i.e. the MPIDR_EL1.Aff* fields) of the boot CPU, but we don't currently log this as we do for 32-bit ARM kernels. This patch makes the kernel log the physical ID of the boot CPU early in the boot process. The CPU logical map initialisation is folded in to smp_setup_processor_id (which contrary to its name is also called by UP kernels). This is called before setup_arch, so should not adversely affect existing cpu_logical_map users. Acked-by: Sudeep Holla Acked-by: Catalin Marinas Acked-by: Lorenzo Pieralisis Signed-off-by: Mark Rutland Signed-off-by: Will Deacon --- arch/arm64/kernel/setup.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index cbeaa2f3844b..c73714b6aa96 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -116,12 +116,16 @@ void __init early_print(const char *str, ...) void __init smp_setup_processor_id(void) { + u64 mpidr = read_cpuid_mpidr() & MPIDR_HWID_BITMASK; + cpu_logical_map(0) = mpidr; + /* * clear __my_cpu_offset on boot CPU to avoid hang caused by * using percpu variable early, for example, lockdep will * access percpu variable inside lock_release */ set_my_cpu_offset(0); + pr_info("Booting Linux on physical CPU 0x%lx\n", (unsigned long)mpidr); } bool arch_match_cpu_phys_id(int cpu, u64 phys_id) @@ -399,7 +403,6 @@ void __init setup_arch(char **cmdline_p) psci_init(); - cpu_logical_map(0) = read_cpuid_mpidr() & MPIDR_HWID_BITMASK; cpu_read_bootcpu_ops(); #ifdef CONFIG_SMP smp_init_cpus(); From f1ba46ee787d0a880f884f401031315b0a777f25 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 7 Nov 2014 14:12:33 +0000 Subject: [PATCH 18/70] arm64: ftrace: eliminate literal pool entries Replace ldr xN, = with adrp/add or adrp/ldr [as appropriate] in the implementation of _mcount(), which may be called very often. Signed-off-by: Ard Biesheuvel Signed-off-by: Will Deacon --- arch/arm64/kernel/entry-ftrace.S | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/arch/arm64/kernel/entry-ftrace.S b/arch/arm64/kernel/entry-ftrace.S index 38e704e597f7..08cafc518b9a 100644 --- a/arch/arm64/kernel/entry-ftrace.S +++ b/arch/arm64/kernel/entry-ftrace.S @@ -98,8 +98,8 @@ ENTRY(_mcount) mcount_enter - ldr x0, =ftrace_trace_function - ldr x2, [x0] + adrp x0, ftrace_trace_function + ldr x2, [x0, #:lo12:ftrace_trace_function] adr x0, ftrace_stub cmp x0, x2 // if (ftrace_trace_function b.eq skip_ftrace_call // != ftrace_stub) { @@ -115,14 +115,15 @@ skip_ftrace_call: // return; mcount_exit // return; // } skip_ftrace_call: - ldr x1, =ftrace_graph_return - ldr x2, [x1] // if ((ftrace_graph_return - cmp x0, x2 // != ftrace_stub) - b.ne ftrace_graph_caller + adrp x1, ftrace_graph_return + ldr x2, [x1, #:lo12:ftrace_graph_return] + cmp x0, x2 // if ((ftrace_graph_return + b.ne ftrace_graph_caller // != ftrace_stub) - ldr x1, =ftrace_graph_entry // || (ftrace_graph_entry - ldr x2, [x1] // != ftrace_graph_entry_stub)) - ldr x0, =ftrace_graph_entry_stub + adrp x1, ftrace_graph_entry // || (ftrace_graph_entry + adrp x0, ftrace_graph_entry_stub // != ftrace_graph_entry_stub)) + ldr x2, [x1, #:lo12:ftrace_graph_entry] + add x0, x0, #:lo12:ftrace_graph_entry_stub cmp x0, x2 b.ne ftrace_graph_caller // ftrace_graph_caller(); From 302cd37c417d59549d073a8986fd028998409cb9 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 7 Nov 2014 14:12:34 +0000 Subject: [PATCH 19/70] arm64: kvm: eliminate literal pool entries Replace two instances of 'ldr xN, =(constant)' in the world switch hot path with 'mov' instructions. Acked-by: Marc Zyngier Acked-by: Christoffer Dall Signed-off-by: Ard Biesheuvel Signed-off-by: Will Deacon --- arch/arm64/kvm/hyp.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S index b72aa9f9215c..fbe909fb0a1a 100644 --- a/arch/arm64/kvm/hyp.S +++ b/arch/arm64/kvm/hyp.S @@ -761,10 +761,10 @@ .macro activate_traps ldr x2, [x0, #VCPU_HCR_EL2] msr hcr_el2, x2 - ldr x2, =(CPTR_EL2_TTA) + mov x2, #CPTR_EL2_TTA msr cptr_el2, x2 - ldr x2, =(1 << 15) // Trap CP15 Cr=15 + mov x2, #(1 << 15) // Trap CP15 Cr=15 msr hstr_el2, x2 mrs x2, mdcr_el2 From 44b82b7700d05a52cd983799d3ecde1a976b3bed Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Fri, 24 Oct 2014 14:56:40 +0100 Subject: [PATCH 20/70] arm64: Fix up /proc/cpuinfo Commit d7a49086f263164a (arm64: cpuinfo: print info for all CPUs) attempted to clean up /proc/cpuinfo, but due to concerns regarding further changes was reverted in commit 5e39977edf6500fd (Revert "arm64: cpuinfo: print info for all CPUs"). There are two major issues with the arm64 /proc/cpuinfo format currently: * The "Features" line describes (only) the 64-bit hwcaps, which is problematic for some 32-bit applications which attempt to parse it. As the same names are used for analogous ISA features (e.g. aes) despite these generally being architecturally unrelated, it is not possible to simply append the 64-bit and 32-bit hwcaps in a manner that might not be misleading to some applications. Various potential solutions have appeared in vendor kernels. Typically the format of the Features line varies depending on whether the task is 32-bit. * Information is only printed regarding a single CPU. This does not match the ARM format, and does not provide sufficient information in big.LITTLE systems where CPUs are heterogeneous. The CPU information printed is queried from the current CPU's registers, which is racy w.r.t. cross-cpu migration. This patch attempts to solve these issues. The following changes are made: * When a task with a LINUX32 personality attempts to read /proc/cpuinfo, the "Features" line contains the decoded 32-bit hwcaps, as with the arm port. Otherwise, the decoded 64-bit hwcaps are shown. This aligns with the behaviour of COMPAT_UTS_MACHINE and COMPAT_ELF_PLATFORM. In the absense of compat support, the Features line is empty. The set of hwcaps injected into a task's auxval are unaffected. * Properties are printed per-cpu, as with the ARM port. The per-cpu information is queried from pre-recorded cpu information (as used by the sanity checks). * As with the previous attempt at fixing up /proc/cpuinfo, the hardware field is removed. The only users so far are 32-bit applications tied to particular boards, so no portable applications should be affected, and this should prevent future tying to particular boards. The following differences remain: * No model_name is printed, as this cannot be queried from the hardware and cannot be provided in a stable fashion. Use of the CPU {implementor,variant,part,revision} fields is sufficient to identify a CPU and is portable across arm and arm64. * The following system-wide properties are not provided, as they are not possible to provide generally. Programs relying on these are already tied to particular (32-bit only) boards: - Hardware - Revision - Serial No software has yet been identified for which these remaining differences are problematic. Cc: Greg Hackmann Cc: Ian Campbell Cc: Serban Constantinescu Cc: Will Deacon Cc: cross-distro@lists.linaro.org Cc: linux-api@vger.kernel.org Cc: linux-arm-kernel@lists.infradead.org Cc: linux-kernel@vger.kernel.org Acked-by: Catalin Marinas Signed-off-by: Mark Rutland Signed-off-by: Will Deacon --- arch/arm64/kernel/setup.c | 97 +++++++++++++++++++++++++++++---------- 1 file changed, 73 insertions(+), 24 deletions(-) diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index c73714b6aa96..831c97fe1ae9 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -43,6 +43,7 @@ #include #include #include +#include #include #include @@ -78,7 +79,6 @@ unsigned int compat_elf_hwcap2 __read_mostly; #endif static const char *cpu_name; -static const char *machine_name; phys_addr_t __fdt_pointer __initdata; /* @@ -315,8 +315,7 @@ static void __init setup_machine_fdt(phys_addr_t dt_phys) cpu_relax(); } - machine_name = of_flat_dt_get_machine_name(); - dump_stack_set_arch_desc("%s (DT)", machine_name); + dump_stack_set_arch_desc("%s (DT)", of_flat_dt_get_machine_name()); } /* @@ -451,14 +450,50 @@ static const char *hwcap_str[] = { NULL }; +#ifdef CONFIG_COMPAT +static const char *compat_hwcap_str[] = { + "swp", + "half", + "thumb", + "26bit", + "fastmult", + "fpa", + "vfp", + "edsp", + "java", + "iwmmxt", + "crunch", + "thumbee", + "neon", + "vfpv3", + "vfpv3d16", + "tls", + "vfpv4", + "idiva", + "idivt", + "vfpd32", + "lpae", + "evtstrm" +}; + +static const char *compat_hwcap2_str[] = { + "aes", + "pmull", + "sha1", + "sha2", + "crc32", + NULL +}; +#endif /* CONFIG_COMPAT */ + static int c_show(struct seq_file *m, void *v) { - int i; - - seq_printf(m, "Processor\t: %s rev %d (%s)\n", - cpu_name, read_cpuid_id() & 15, ELF_PLATFORM); + int i, j; for_each_online_cpu(i) { + struct cpuinfo_arm64 *cpuinfo = &per_cpu(cpu_data, i); + u32 midr = cpuinfo->reg_midr; + /* * glibc reads /proc/cpuinfo to determine the number of * online processors, looking for lines beginning with @@ -467,25 +502,39 @@ static int c_show(struct seq_file *m, void *v) #ifdef CONFIG_SMP seq_printf(m, "processor\t: %d\n", i); #endif + + /* + * Dump out the common processor features in a single line. + * Userspace should read the hwcaps with getauxval(AT_HWCAP) + * rather than attempting to parse this, but there's a body of + * software which does already (at least for 32-bit). + */ + seq_puts(m, "Features\t:"); + if (personality(current->personality) == PER_LINUX32) { +#ifdef CONFIG_COMPAT + for (j = 0; compat_hwcap_str[j]; j++) + if (compat_elf_hwcap & (1 << j)) + seq_printf(m, " %s", compat_hwcap_str[j]); + + for (j = 0; compat_hwcap2_str[j]; j++) + if (compat_elf_hwcap2 & (1 << j)) + seq_printf(m, " %s", compat_hwcap2_str[j]); +#endif /* CONFIG_COMPAT */ + } else { + for (j = 0; hwcap_str[j]; j++) + if (elf_hwcap & (1 << j)) + seq_printf(m, " %s", hwcap_str[j]); + } + seq_puts(m, "\n"); + + seq_printf(m, "CPU implementer\t: 0x%02x\n", + MIDR_IMPLEMENTOR(midr)); + seq_printf(m, "CPU architecture: 8\n"); + seq_printf(m, "CPU variant\t: 0x%x\n", MIDR_VARIANT(midr)); + seq_printf(m, "CPU part\t: 0x%03x\n", MIDR_PARTNUM(midr)); + seq_printf(m, "CPU revision\t: %d\n\n", MIDR_REVISION(midr)); } - /* dump out the processor features */ - seq_puts(m, "Features\t: "); - - for (i = 0; hwcap_str[i]; i++) - if (elf_hwcap & (1 << i)) - seq_printf(m, "%s ", hwcap_str[i]); - - seq_printf(m, "\nCPU implementer\t: 0x%02x\n", read_cpuid_id() >> 24); - seq_printf(m, "CPU architecture: AArch64\n"); - seq_printf(m, "CPU variant\t: 0x%x\n", (read_cpuid_id() >> 20) & 15); - seq_printf(m, "CPU part\t: 0x%03x\n", (read_cpuid_id() >> 4) & 0xfff); - seq_printf(m, "CPU revision\t: %d\n", read_cpuid_id() & 15); - - seq_puts(m, "\n"); - - seq_printf(m, "Hardware\t: %s\n", machine_name); - return 0; } From d54e81f9af1d106e47ae8594903c43a80dae1a99 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 29 Sep 2014 11:44:01 +0100 Subject: [PATCH 21/70] arm64: entry: avoid writing lr explicitly for constructing return paths Using an explicit adr instruction to set the link register to point at ret_fast_syscall/ret_to_user can defeat branch and return stack predictors. Instead, use the standard calling instructions (bl, blr) and have an unconditional branch as the following instruction. Signed-off-by: Will Deacon --- arch/arm64/kernel/entry.S | 45 ++++++++++++++++++++++----------------- 1 file changed, 25 insertions(+), 20 deletions(-) diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 726b910fe6ec..2cebe56d650c 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -455,8 +455,8 @@ el0_da: bic x0, x26, #(0xff << 56) mov x1, x25 mov x2, sp - adr lr, ret_to_user - b do_mem_abort + bl do_mem_abort + b ret_to_user el0_ia: /* * Instruction abort handling @@ -468,8 +468,8 @@ el0_ia: mov x0, x26 orr x1, x25, #1 << 24 // use reserved ISS bit for instruction aborts mov x2, sp - adr lr, ret_to_user - b do_mem_abort + bl do_mem_abort + b ret_to_user el0_fpsimd_acc: /* * Floating Point or Advanced SIMD access @@ -478,8 +478,8 @@ el0_fpsimd_acc: ct_user_exit mov x0, x25 mov x1, sp - adr lr, ret_to_user - b do_fpsimd_acc + bl do_fpsimd_acc + b ret_to_user el0_fpsimd_exc: /* * Floating Point or Advanced SIMD exception @@ -488,8 +488,8 @@ el0_fpsimd_exc: ct_user_exit mov x0, x25 mov x1, sp - adr lr, ret_to_user - b do_fpsimd_exc + bl do_fpsimd_exc + b ret_to_user el0_sp_pc: /* * Stack or PC alignment exception handling @@ -500,8 +500,8 @@ el0_sp_pc: mov x0, x26 mov x1, x25 mov x2, sp - adr lr, ret_to_user - b do_sp_pc_abort + bl do_sp_pc_abort + b ret_to_user el0_undef: /* * Undefined instruction @@ -510,8 +510,8 @@ el0_undef: enable_dbg_and_irq ct_user_exit mov x0, sp - adr lr, ret_to_user - b do_undefinstr + bl do_undefinstr + b ret_to_user el0_dbg: /* * Debug exception handling @@ -530,8 +530,8 @@ el0_inv: mov x0, sp mov x1, #BAD_SYNC mrs x2, esr_el1 - adr lr, ret_to_user - b bad_mode + bl bad_mode + b ret_to_user ENDPROC(el0_sync) .align 6 @@ -653,14 +653,15 @@ el0_svc_naked: // compat entry point ldr x16, [tsk, #TI_FLAGS] // check for syscall hooks tst x16, #_TIF_SYSCALL_WORK b.ne __sys_trace - adr lr, ret_fast_syscall // return address cmp scno, sc_nr // check upper syscall limit b.hs ni_sys ldr x16, [stbl, scno, lsl #3] // address in the syscall table - br x16 // call sys_* routine + blr x16 // call sys_* routine + b ret_fast_syscall ni_sys: mov x0, sp - b do_ni_syscall + bl do_ni_syscall + b ret_fast_syscall ENDPROC(el0_svc) /* @@ -670,17 +671,16 @@ ENDPROC(el0_svc) __sys_trace: mov x0, sp bl syscall_trace_enter - adr lr, __sys_trace_return // return address uxtw scno, w0 // syscall number (possibly new) mov x1, sp // pointer to regs cmp scno, sc_nr // check upper syscall limit - b.hs ni_sys + b.hs __ni_sys_trace ldp x0, x1, [sp] // restore the syscall args ldp x2, x3, [sp, #S_X2] ldp x4, x5, [sp, #S_X4] ldp x6, x7, [sp, #S_X6] ldr x16, [stbl, scno, lsl #3] // address in the syscall table - br x16 // call sys_* routine + blr x16 // call sys_* routine __sys_trace_return: str x0, [sp] // save returned x0 @@ -688,6 +688,11 @@ __sys_trace_return: bl syscall_trace_exit b ret_to_user +__ni_sys_trace: + mov x0, sp + bl do_ni_syscall + b __sys_trace_return + /* * Special system call wrappers. */ From 63648dd20fa0780ab6c1e923b5c276d257422cb3 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 29 Sep 2014 12:26:41 +0100 Subject: [PATCH 22/70] arm64: entry: use ldp/stp instead of push/pop when saving/restoring regs The push/pop instructions can be suboptimal when saving/restoring large amounts of data to/from the stack, for example on entry/exit from the kernel. This is because: (1) They act on descending addresses (i.e. the newly decremented sp), which may defeat some hardware prefetchers (2) They introduce an implicit dependency between each instruction, as the sp has to be updated in order to resolve the address of the next access. This patch removes the push/pop instructions from our kernel entry/exit macros in favour of ldp/stp plus offset. Signed-off-by: Will Deacon --- arch/arm64/kernel/entry.S | 81 +++++++++++++++++++-------------------- 1 file changed, 40 insertions(+), 41 deletions(-) diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 2cebe56d650c..622a409916f3 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -64,25 +64,26 @@ #define BAD_ERROR 3 .macro kernel_entry, el, regsize = 64 - sub sp, sp, #S_FRAME_SIZE - S_LR // room for LR, SP, SPSR, ELR + sub sp, sp, #S_FRAME_SIZE .if \regsize == 32 mov w0, w0 // zero upper 32 bits of x0 .endif - push x28, x29 - push x26, x27 - push x24, x25 - push x22, x23 - push x20, x21 - push x18, x19 - push x16, x17 - push x14, x15 - push x12, x13 - push x10, x11 - push x8, x9 - push x6, x7 - push x4, x5 - push x2, x3 - push x0, x1 + stp x0, x1, [sp, #16 * 0] + stp x2, x3, [sp, #16 * 1] + stp x4, x5, [sp, #16 * 2] + stp x6, x7, [sp, #16 * 3] + stp x8, x9, [sp, #16 * 4] + stp x10, x11, [sp, #16 * 5] + stp x12, x13, [sp, #16 * 6] + stp x14, x15, [sp, #16 * 7] + stp x16, x17, [sp, #16 * 8] + stp x18, x19, [sp, #16 * 9] + stp x20, x21, [sp, #16 * 10] + stp x22, x23, [sp, #16 * 11] + stp x24, x25, [sp, #16 * 12] + stp x26, x27, [sp, #16 * 13] + stp x28, x29, [sp, #16 * 14] + .if \el == 0 mrs x21, sp_el0 get_thread_info tsk // Ensure MDSCR_EL1.SS is clear, @@ -118,33 +119,31 @@ .if \el == 0 ct_user_enter ldr x23, [sp, #S_SP] // load return stack pointer - .endif - .if \ret - ldr x1, [sp, #S_X1] // preserve x0 (syscall return) - add sp, sp, S_X2 - .else - pop x0, x1 - .endif - pop x2, x3 // load the rest of the registers - pop x4, x5 - pop x6, x7 - pop x8, x9 - msr elr_el1, x21 // set up the return data - msr spsr_el1, x22 - .if \el == 0 msr sp_el0, x23 .endif - pop x10, x11 - pop x12, x13 - pop x14, x15 - pop x16, x17 - pop x18, x19 - pop x20, x21 - pop x22, x23 - pop x24, x25 - pop x26, x27 - pop x28, x29 - ldr lr, [sp], #S_FRAME_SIZE - S_LR // load LR and restore SP + msr elr_el1, x21 // set up the return data + msr spsr_el1, x22 + .if \ret + ldr x1, [sp, #S_X1] // preserve x0 (syscall return) + .else + ldp x0, x1, [sp, #16 * 0] + .endif + ldp x2, x3, [sp, #16 * 1] + ldp x4, x5, [sp, #16 * 2] + ldp x6, x7, [sp, #16 * 3] + ldp x8, x9, [sp, #16 * 4] + ldp x10, x11, [sp, #16 * 5] + ldp x12, x13, [sp, #16 * 6] + ldp x14, x15, [sp, #16 * 7] + ldp x16, x17, [sp, #16 * 8] + ldp x18, x19, [sp, #16 * 9] + ldp x20, x21, [sp, #16 * 10] + ldp x22, x23, [sp, #16 * 11] + ldp x24, x25, [sp, #16 * 12] + ldp x26, x27, [sp, #16 * 13] + ldp x28, x29, [sp, #16 * 14] + ldr lr, [sp, #S_LR] + add sp, sp, #S_FRAME_SIZE // restore sp eret // return to kernel .endm From fb7332a9fedfd62b1ba6530c86f39f0fa38afd49 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 29 Oct 2014 10:03:09 +0000 Subject: [PATCH 23/70] mmu_gather: move minimal range calculations into generic code On architectures with hardware broadcasting of TLB invalidation messages , it makes sense to reduce the range of the mmu_gather structure when unmapping page ranges based on the dirty address information passed to tlb_remove_tlb_entry. arm64 already does this by directly manipulating the start/end fields of the gather structure, but this confuses the generic code which does not expect these fields to change and can end up calculating invalid, negative ranges when forcing a flush in zap_pte_range. This patch moves the minimal range calculation out of the arm64 code and into the generic implementation, simplifying zap_pte_range in the process (which no longer needs to care about start/end, since they will point to the appropriate ranges already). With the range being tracked by core code, the need_flush flag is dropped in favour of checking that the end of the range has actually been set. Cc: Benjamin Herrenschmidt Cc: Peter Zijlstra Cc: Russell King - ARM Linux Cc: Michal Simek Acked-by: Linus Torvalds Signed-off-by: Will Deacon --- arch/arm64/include/asm/tlb.h | 67 ++---------------------------- arch/microblaze/include/asm/tlb.h | 3 +- arch/powerpc/include/asm/pgalloc.h | 3 +- arch/powerpc/include/asm/tlb.h | 1 + arch/powerpc/mm/hugetlbpage.c | 2 - include/asm-generic/tlb.h | 57 ++++++++++++++++++++----- mm/memory.c | 30 ++++--------- 7 files changed, 63 insertions(+), 100 deletions(-) diff --git a/arch/arm64/include/asm/tlb.h b/arch/arm64/include/asm/tlb.h index a82c0c5c8b52..c028fe37456f 100644 --- a/arch/arm64/include/asm/tlb.h +++ b/arch/arm64/include/asm/tlb.h @@ -19,10 +19,6 @@ #ifndef __ASM_TLB_H #define __ASM_TLB_H -#define __tlb_remove_pmd_tlb_entry __tlb_remove_pmd_tlb_entry - -#include - #include #include @@ -37,71 +33,22 @@ static inline void __tlb_remove_table(void *_table) #define tlb_remove_entry(tlb, entry) tlb_remove_page(tlb, entry) #endif /* CONFIG_HAVE_RCU_TABLE_FREE */ -/* - * There's three ways the TLB shootdown code is used: - * 1. Unmapping a range of vmas. See zap_page_range(), unmap_region(). - * tlb->fullmm = 0, and tlb_start_vma/tlb_end_vma will be called. - * 2. Unmapping all vmas. See exit_mmap(). - * tlb->fullmm = 1, and tlb_start_vma/tlb_end_vma will be called. - * Page tables will be freed. - * 3. Unmapping argument pages. See shift_arg_pages(). - * tlb->fullmm = 0, but tlb_start_vma/tlb_end_vma will not be called. - */ +#include + static inline void tlb_flush(struct mmu_gather *tlb) { if (tlb->fullmm) { flush_tlb_mm(tlb->mm); - } else if (tlb->end > 0) { + } else { struct vm_area_struct vma = { .vm_mm = tlb->mm, }; flush_tlb_range(&vma, tlb->start, tlb->end); - tlb->start = TASK_SIZE; - tlb->end = 0; } } -static inline void tlb_add_flush(struct mmu_gather *tlb, unsigned long addr) -{ - if (!tlb->fullmm) { - tlb->start = min(tlb->start, addr); - tlb->end = max(tlb->end, addr + PAGE_SIZE); - } -} - -/* - * Memorize the range for the TLB flush. - */ -static inline void __tlb_remove_tlb_entry(struct mmu_gather *tlb, pte_t *ptep, - unsigned long addr) -{ - tlb_add_flush(tlb, addr); -} - -/* - * In the case of tlb vma handling, we can optimise these away in the - * case where we're doing a full MM flush. When we're doing a munmap, - * the vmas are adjusted to only cover the region to be torn down. - */ -static inline void tlb_start_vma(struct mmu_gather *tlb, - struct vm_area_struct *vma) -{ - if (!tlb->fullmm) { - tlb->start = TASK_SIZE; - tlb->end = 0; - } -} - -static inline void tlb_end_vma(struct mmu_gather *tlb, - struct vm_area_struct *vma) -{ - if (!tlb->fullmm) - tlb_flush(tlb); -} - static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte, unsigned long addr) { pgtable_page_dtor(pte); - tlb_add_flush(tlb, addr); tlb_remove_entry(tlb, pte); } @@ -109,7 +56,6 @@ static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte, static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmdp, unsigned long addr) { - tlb_add_flush(tlb, addr); tlb_remove_entry(tlb, virt_to_page(pmdp)); } #endif @@ -118,15 +64,8 @@ static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmdp, static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pudp, unsigned long addr) { - tlb_add_flush(tlb, addr); tlb_remove_entry(tlb, virt_to_page(pudp)); } #endif -static inline void __tlb_remove_pmd_tlb_entry(struct mmu_gather *tlb, pmd_t *pmdp, - unsigned long address) -{ - tlb_add_flush(tlb, address); -} - #endif diff --git a/arch/microblaze/include/asm/tlb.h b/arch/microblaze/include/asm/tlb.h index 8aa97817cc8c..99b6ded54849 100644 --- a/arch/microblaze/include/asm/tlb.h +++ b/arch/microblaze/include/asm/tlb.h @@ -14,7 +14,6 @@ #define tlb_flush(tlb) flush_tlb_mm((tlb)->mm) #include -#include #ifdef CONFIG_MMU #define tlb_start_vma(tlb, vma) do { } while (0) @@ -22,4 +21,6 @@ #define __tlb_remove_tlb_entry(tlb, pte, address) do { } while (0) #endif +#include + #endif /* _ASM_MICROBLAZE_TLB_H */ diff --git a/arch/powerpc/include/asm/pgalloc.h b/arch/powerpc/include/asm/pgalloc.h index e9a9f60e596d..fc3ee06eab87 100644 --- a/arch/powerpc/include/asm/pgalloc.h +++ b/arch/powerpc/include/asm/pgalloc.h @@ -3,7 +3,6 @@ #ifdef __KERNEL__ #include -#include #ifdef CONFIG_PPC_BOOK3E extern void tlb_flush_pgtable(struct mmu_gather *tlb, unsigned long address); @@ -14,6 +13,8 @@ static inline void tlb_flush_pgtable(struct mmu_gather *tlb, } #endif /* !CONFIG_PPC_BOOK3E */ +extern void tlb_remove_table(struct mmu_gather *tlb, void *table); + #ifdef CONFIG_PPC64 #include #else diff --git a/arch/powerpc/include/asm/tlb.h b/arch/powerpc/include/asm/tlb.h index e2b428b0f7ba..20733fa518ae 100644 --- a/arch/powerpc/include/asm/tlb.h +++ b/arch/powerpc/include/asm/tlb.h @@ -27,6 +27,7 @@ #define tlb_start_vma(tlb, vma) do { } while (0) #define tlb_end_vma(tlb, vma) do { } while (0) +#define __tlb_remove_tlb_entry __tlb_remove_tlb_entry extern void tlb_flush(struct mmu_gather *tlb); diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 7e70ae968e5f..6a4a5fcb9730 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -517,8 +517,6 @@ static void free_hugepd_range(struct mmu_gather *tlb, hugepd_t *hpdp, int pdshif for (i = 0; i < num_hugepd; i++, hpdp++) hpdp->pd = 0; - tlb->need_flush = 1; - #ifdef CONFIG_PPC_FSL_BOOK3E hugepd_free(tlb, hugepte); #else diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h index 5672d7ea1fa0..08848050922e 100644 --- a/include/asm-generic/tlb.h +++ b/include/asm-generic/tlb.h @@ -96,10 +96,9 @@ struct mmu_gather { #endif unsigned long start; unsigned long end; - unsigned int need_flush : 1, /* Did free PTEs */ /* we are in the middle of an operation to clear * a full mm and can make some optimizations */ - fullmm : 1, + unsigned int fullmm : 1, /* we have performed an operation which * requires a complete flush of the tlb */ need_flush_all : 1; @@ -128,16 +127,54 @@ static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page) tlb_flush_mmu(tlb); } +static inline void __tlb_adjust_range(struct mmu_gather *tlb, + unsigned long address) +{ + tlb->start = min(tlb->start, address); + tlb->end = max(tlb->end, address + PAGE_SIZE); +} + +static inline void __tlb_reset_range(struct mmu_gather *tlb) +{ + tlb->start = TASK_SIZE; + tlb->end = 0; +} + +/* + * In the case of tlb vma handling, we can optimise these away in the + * case where we're doing a full MM flush. When we're doing a munmap, + * the vmas are adjusted to only cover the region to be torn down. + */ +#ifndef tlb_start_vma +#define tlb_start_vma(tlb, vma) do { } while (0) +#endif + +#define __tlb_end_vma(tlb, vma) \ + do { \ + if (!tlb->fullmm && tlb->end) { \ + tlb_flush(tlb); \ + __tlb_reset_range(tlb); \ + } \ + } while (0) + +#ifndef tlb_end_vma +#define tlb_end_vma __tlb_end_vma +#endif + +#ifndef __tlb_remove_tlb_entry +#define __tlb_remove_tlb_entry(tlb, ptep, address) do { } while (0) +#endif + /** * tlb_remove_tlb_entry - remember a pte unmapping for later tlb invalidation. * - * Record the fact that pte's were really umapped in ->need_flush, so we can - * later optimise away the tlb invalidate. This helps when userspace is - * unmapping already-unmapped pages, which happens quite a lot. + * Record the fact that pte's were really unmapped by updating the range, + * so we can later optimise away the tlb invalidate. This helps when + * userspace is unmapping already-unmapped pages, which happens quite a lot. */ #define tlb_remove_tlb_entry(tlb, ptep, address) \ do { \ - tlb->need_flush = 1; \ + __tlb_adjust_range(tlb, address); \ __tlb_remove_tlb_entry(tlb, ptep, address); \ } while (0) @@ -151,27 +188,27 @@ static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page) #define tlb_remove_pmd_tlb_entry(tlb, pmdp, address) \ do { \ - tlb->need_flush = 1; \ + __tlb_adjust_range(tlb, address); \ __tlb_remove_pmd_tlb_entry(tlb, pmdp, address); \ } while (0) #define pte_free_tlb(tlb, ptep, address) \ do { \ - tlb->need_flush = 1; \ + __tlb_adjust_range(tlb, address); \ __pte_free_tlb(tlb, ptep, address); \ } while (0) #ifndef __ARCH_HAS_4LEVEL_HACK #define pud_free_tlb(tlb, pudp, address) \ do { \ - tlb->need_flush = 1; \ + __tlb_adjust_range(tlb, address); \ __pud_free_tlb(tlb, pudp, address); \ } while (0) #endif #define pmd_free_tlb(tlb, pmdp, address) \ do { \ - tlb->need_flush = 1; \ + __tlb_adjust_range(tlb, address); \ __pmd_free_tlb(tlb, pmdp, address); \ } while (0) diff --git a/mm/memory.c b/mm/memory.c index 1cc6bfbd872e..c71edae9ba44 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -220,9 +220,6 @@ void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long /* Is it from 0 to ~0? */ tlb->fullmm = !(start | (end+1)); tlb->need_flush_all = 0; - tlb->start = start; - tlb->end = end; - tlb->need_flush = 0; tlb->local.next = NULL; tlb->local.nr = 0; tlb->local.max = ARRAY_SIZE(tlb->__pages); @@ -232,15 +229,20 @@ void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long #ifdef CONFIG_HAVE_RCU_TABLE_FREE tlb->batch = NULL; #endif + + __tlb_reset_range(tlb); } static void tlb_flush_mmu_tlbonly(struct mmu_gather *tlb) { - tlb->need_flush = 0; + if (!tlb->end) + return; + tlb_flush(tlb); #ifdef CONFIG_HAVE_RCU_TABLE_FREE tlb_table_flush(tlb); #endif + __tlb_reset_range(tlb); } static void tlb_flush_mmu_free(struct mmu_gather *tlb) @@ -256,8 +258,6 @@ static void tlb_flush_mmu_free(struct mmu_gather *tlb) void tlb_flush_mmu(struct mmu_gather *tlb) { - if (!tlb->need_flush) - return; tlb_flush_mmu_tlbonly(tlb); tlb_flush_mmu_free(tlb); } @@ -292,7 +292,7 @@ int __tlb_remove_page(struct mmu_gather *tlb, struct page *page) { struct mmu_gather_batch *batch; - VM_BUG_ON(!tlb->need_flush); + VM_BUG_ON(!tlb->end); batch = tlb->active; batch->pages[batch->nr++] = page; @@ -359,8 +359,6 @@ void tlb_remove_table(struct mmu_gather *tlb, void *table) { struct mmu_table_batch **batch = &tlb->batch; - tlb->need_flush = 1; - /* * When there's less then two users of this mm there cannot be a * concurrent page-table walk. @@ -1185,20 +1183,8 @@ again: arch_leave_lazy_mmu_mode(); /* Do the actual TLB flush before dropping ptl */ - if (force_flush) { - unsigned long old_end; - - /* - * Flush the TLB just for the previous segment, - * then update the range to be the remaining - * TLB range. - */ - old_end = tlb->end; - tlb->end = addr; + if (force_flush) tlb_flush_mmu_tlbonly(tlb); - tlb->start = addr; - tlb->end = old_end; - } pte_unmap_unlock(start_pte, ptl); /* From 7d57511d2dba03a8046c8b428dd9192a4bfc1e73 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Mon, 17 Nov 2014 10:37:40 +0000 Subject: [PATCH 24/70] arm64: Add COMPAT_HWCAP_LPAE Commit a469abd0f868 (ARM: elf: add new hwcap for identifying atomic ldrd/strd instructions) introduces HWCAP_ELF for 32-bit ARM applications. As LPAE is always present on arm64, report the corresponding compat HWCAP to user space. Signed-off-by: Catalin Marinas Cc: # 3.11+ Signed-off-by: Will Deacon --- arch/arm64/include/asm/hwcap.h | 1 + arch/arm64/kernel/setup.c | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/hwcap.h b/arch/arm64/include/asm/hwcap.h index 024c46183c3c..0ad735166d9f 100644 --- a/arch/arm64/include/asm/hwcap.h +++ b/arch/arm64/include/asm/hwcap.h @@ -30,6 +30,7 @@ #define COMPAT_HWCAP_IDIVA (1 << 17) #define COMPAT_HWCAP_IDIVT (1 << 18) #define COMPAT_HWCAP_IDIV (COMPAT_HWCAP_IDIVA|COMPAT_HWCAP_IDIVT) +#define COMPAT_HWCAP_LPAE (1 << 20) #define COMPAT_HWCAP_EVTSTRM (1 << 21) #define COMPAT_HWCAP2_AES (1 << 0) diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 831c97fe1ae9..2e17bd3806c8 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -73,7 +73,8 @@ EXPORT_SYMBOL_GPL(elf_hwcap); COMPAT_HWCAP_FAST_MULT|COMPAT_HWCAP_EDSP|\ COMPAT_HWCAP_TLS|COMPAT_HWCAP_VFP|\ COMPAT_HWCAP_VFPv3|COMPAT_HWCAP_VFPv4|\ - COMPAT_HWCAP_NEON|COMPAT_HWCAP_IDIV) + COMPAT_HWCAP_NEON|COMPAT_HWCAP_IDIV|\ + COMPAT_HWCAP_LPAE) unsigned int compat_elf_hwcap __read_mostly = COMPAT_ELF_HWCAP_DEFAULT; unsigned int compat_elf_hwcap2 __read_mostly; #endif From d6c763afab142a85e4770b4bc2a5f40f256d5c5d Mon Sep 17 00:00:00 2001 From: Yann Droneaud Date: Mon, 17 Nov 2014 23:02:19 +0000 Subject: [PATCH 25/70] arm64/mm: Remove hack in mmap randomize layout Since commit 8a0a9bd4db63 ('random: make get_random_int() more random'), get_random_int() returns a random value for each call, so comment and hack introduced in mmap_rnd() as part of commit 1d18c47c735e ('arm64: MMU fault handling and page table management') are incorrects. Commit 1d18c47c735e seems to use the same hack introduced by commit a5adc91a4b44 ('powerpc: Ensure random space between stack and mmaps'), latter copied in commit 5a0efea09f42 ('sparc64: Sharpen address space randomization calculations.'). But both architectures were cleaned up as part of commit fa8cbaaf5a68 ('powerpc+sparc64/mm: Remove hack in mmap randomize layout') as hack is no more needed since commit 8a0a9bd4db63. So the present patch removes the comment and the hack around get_random_int() on AArch64's mmap_rnd(). Cc: David S. Miller Cc: Anton Blanchard Cc: Benjamin Herrenschmidt Acked-by: Will Deacon Acked-by: Dan McGee Signed-off-by: Yann Droneaud Signed-off-by: Will Deacon --- arch/arm64/mm/mmap.c | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/arch/arm64/mm/mmap.c b/arch/arm64/mm/mmap.c index 1d73662f00ff..54922d1275b8 100644 --- a/arch/arm64/mm/mmap.c +++ b/arch/arm64/mm/mmap.c @@ -47,22 +47,14 @@ static int mmap_is_legacy(void) return sysctl_legacy_va_layout; } -/* - * Since get_random_int() returns the same value within a 1 jiffy window, we - * will almost always get the same randomisation for the stack and mmap - * region. This will mean the relative distance between stack and mmap will be - * the same. - * - * To avoid this we can shift the randomness by 1 bit. - */ static unsigned long mmap_rnd(void) { unsigned long rnd = 0; if (current->flags & PF_RANDOMIZE) - rnd = (long)get_random_int() & (STACK_RND_MASK >> 1); + rnd = (long)get_random_int() & STACK_RND_MASK; - return rnd << (PAGE_SHIFT + 1); + return rnd << PAGE_SHIFT; } static unsigned long mmap_base(void) From 15670ef1eac9817cf48da12c885aabcdd88e9add Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 19 Nov 2014 17:44:12 +0000 Subject: [PATCH 26/70] arm64: pgalloc: consistently use PGALLOC_GFP We currently allocate different levels of page tables with a variety of differing flags, and the PGALLOC_GFP flags, intended for use when allocating any level of page table, are only used for ptes in pte_alloc_one. On x86, PGALLOC_GFP is used for all page table allocations. Currently the major differences are: * __GFP_NOTRACK -- Needed to ensure page tables are always accessible in the presence of kmemcheck to prevent recursive faults. Currently kmemcheck cannot be selected for arm64. * __GFP_REPEAT -- Causes the allocator to try to reclaim pages and retry upon a failure to allocate. * __GFP_ZERO -- Sometimes passed explicitly, sometimes zalloc variants are used. While we've no encountered issues so far, it would be preferable to be consistent. This patch ensures all levels of table are allocated in the same manner, with PGALLOC_GFP. Cc: Steve Capper Acked-by: Catalin Marinas Signed-off-by: Mark Rutland Signed-off-by: Will Deacon --- arch/arm64/include/asm/pgalloc.h | 8 ++++---- arch/arm64/mm/pgd.c | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h index d5bed02073d6..e20df38a8ff3 100644 --- a/arch/arm64/include/asm/pgalloc.h +++ b/arch/arm64/include/asm/pgalloc.h @@ -26,11 +26,13 @@ #define check_pgt_cache() do { } while (0) +#define PGALLOC_GFP (GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO) + #if CONFIG_ARM64_PGTABLE_LEVELS > 2 static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr) { - return (pmd_t *)get_zeroed_page(GFP_KERNEL | __GFP_REPEAT); + return (pmd_t *)__get_free_page(PGALLOC_GFP); } static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd) @@ -50,7 +52,7 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr) { - return (pud_t *)get_zeroed_page(GFP_KERNEL | __GFP_REPEAT); + return (pud_t *)__get_free_page(PGALLOC_GFP); } static inline void pud_free(struct mm_struct *mm, pud_t *pud) @@ -69,8 +71,6 @@ static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud) extern pgd_t *pgd_alloc(struct mm_struct *mm); extern void pgd_free(struct mm_struct *mm, pgd_t *pgd); -#define PGALLOC_GFP (GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO) - static inline pte_t * pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr) { diff --git a/arch/arm64/mm/pgd.c b/arch/arm64/mm/pgd.c index 6682b361d3ac..71ca104f97bd 100644 --- a/arch/arm64/mm/pgd.c +++ b/arch/arm64/mm/pgd.c @@ -35,9 +35,9 @@ static struct kmem_cache *pgd_cache; pgd_t *pgd_alloc(struct mm_struct *mm) { if (PGD_SIZE == PAGE_SIZE) - return (pgd_t *)get_zeroed_page(GFP_KERNEL); + return (pgd_t *)__get_free_page(PGALLOC_GFP); else - return kmem_cache_zalloc(pgd_cache, GFP_KERNEL); + return kmem_cache_alloc(pgd_cache, PGALLOC_GFP); } void pgd_free(struct mm_struct *mm, pgd_t *pgd) From f97fc810798c261b2790c2a1660461a508a479e0 Mon Sep 17 00:00:00 2001 From: Steve Capper Date: Wed, 19 Nov 2014 16:53:43 +0000 Subject: [PATCH 27/70] arm64: percpu: Implement this_cpu operations The generic this_cpu operations disable interrupts to ensure that the requested operation is protected from pre-emption. For arm64, this is overkill and can hurt throughput and latency. This patch provides arm64 specific implementations for the this_cpu operations. Rather than disable interrupts, we use the exclusive monitor or atomic operations as appropriate. The following operations are implemented: add, add_return, and, or, read, write, xchg. We also wire up a cmpxchg implementation from cmpxchg.h. Testing was performed using the percpu_test module and hackbench on a Juno board running 3.18-rc4. Signed-off-by: Steve Capper Reviewed-by: Will Deacon Signed-off-by: Will Deacon --- arch/arm64/include/asm/cmpxchg.h | 6 +- arch/arm64/include/asm/percpu.h | 215 +++++++++++++++++++++++++++++++ 2 files changed, 219 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/cmpxchg.h b/arch/arm64/include/asm/cmpxchg.h index 89e397befad5..cb9593079f29 100644 --- a/arch/arm64/include/asm/cmpxchg.h +++ b/arch/arm64/include/asm/cmpxchg.h @@ -246,8 +246,10 @@ static inline unsigned long __cmpxchg_mb(volatile void *ptr, unsigned long old, __ret; \ }) -#define this_cpu_cmpxchg_8(ptr, o, n) \ - cmpxchg_local(raw_cpu_ptr(&(ptr)), o, n) +#define this_cpu_cmpxchg_1(ptr, o, n) cmpxchg_local(raw_cpu_ptr(&(ptr)), o, n) +#define this_cpu_cmpxchg_2(ptr, o, n) cmpxchg_local(raw_cpu_ptr(&(ptr)), o, n) +#define this_cpu_cmpxchg_4(ptr, o, n) cmpxchg_local(raw_cpu_ptr(&(ptr)), o, n) +#define this_cpu_cmpxchg_8(ptr, o, n) cmpxchg_local(raw_cpu_ptr(&(ptr)), o, n) #define this_cpu_cmpxchg_double_8(ptr1, ptr2, o1, o2, n1, n2) \ cmpxchg_double_local(raw_cpu_ptr(&(ptr1)), raw_cpu_ptr(&(ptr2)), \ diff --git a/arch/arm64/include/asm/percpu.h b/arch/arm64/include/asm/percpu.h index 5279e5733386..09da25bc596f 100644 --- a/arch/arm64/include/asm/percpu.h +++ b/arch/arm64/include/asm/percpu.h @@ -44,6 +44,221 @@ static inline unsigned long __my_cpu_offset(void) #endif /* CONFIG_SMP */ +#define PERCPU_OP(op, asm_op) \ +static inline unsigned long __percpu_##op(void *ptr, \ + unsigned long val, int size) \ +{ \ + unsigned long loop, ret; \ + \ + switch (size) { \ + case 1: \ + do { \ + asm ("//__per_cpu_" #op "_1\n" \ + "ldxrb %w[ret], %[ptr]\n" \ + #asm_op " %w[ret], %w[ret], %w[val]\n" \ + "stxrb %w[loop], %w[ret], %[ptr]\n" \ + : [loop] "=&r" (loop), [ret] "=&r" (ret), \ + [ptr] "+Q"(*(u8 *)ptr) \ + : [val] "Ir" (val)); \ + } while (loop); \ + break; \ + case 2: \ + do { \ + asm ("//__per_cpu_" #op "_2\n" \ + "ldxrh %w[ret], %[ptr]\n" \ + #asm_op " %w[ret], %w[ret], %w[val]\n" \ + "stxrh %w[loop], %w[ret], %[ptr]\n" \ + : [loop] "=&r" (loop), [ret] "=&r" (ret), \ + [ptr] "+Q"(*(u16 *)ptr) \ + : [val] "Ir" (val)); \ + } while (loop); \ + break; \ + case 4: \ + do { \ + asm ("//__per_cpu_" #op "_4\n" \ + "ldxr %w[ret], %[ptr]\n" \ + #asm_op " %w[ret], %w[ret], %w[val]\n" \ + "stxr %w[loop], %w[ret], %[ptr]\n" \ + : [loop] "=&r" (loop), [ret] "=&r" (ret), \ + [ptr] "+Q"(*(u32 *)ptr) \ + : [val] "Ir" (val)); \ + } while (loop); \ + break; \ + case 8: \ + do { \ + asm ("//__per_cpu_" #op "_8\n" \ + "ldxr %[ret], %[ptr]\n" \ + #asm_op " %[ret], %[ret], %[val]\n" \ + "stxr %w[loop], %[ret], %[ptr]\n" \ + : [loop] "=&r" (loop), [ret] "=&r" (ret), \ + [ptr] "+Q"(*(u64 *)ptr) \ + : [val] "Ir" (val)); \ + } while (loop); \ + break; \ + default: \ + BUILD_BUG(); \ + } \ + \ + return ret; \ +} + +PERCPU_OP(add, add) +PERCPU_OP(and, and) +PERCPU_OP(or, orr) +#undef PERCPU_OP + +static inline unsigned long __percpu_read(void *ptr, int size) +{ + unsigned long ret; + + switch (size) { + case 1: + ret = ACCESS_ONCE(*(u8 *)ptr); + break; + case 2: + ret = ACCESS_ONCE(*(u16 *)ptr); + break; + case 4: + ret = ACCESS_ONCE(*(u32 *)ptr); + break; + case 8: + ret = ACCESS_ONCE(*(u64 *)ptr); + break; + default: + BUILD_BUG(); + } + + return ret; +} + +static inline void __percpu_write(void *ptr, unsigned long val, int size) +{ + switch (size) { + case 1: + ACCESS_ONCE(*(u8 *)ptr) = (u8)val; + break; + case 2: + ACCESS_ONCE(*(u16 *)ptr) = (u16)val; + break; + case 4: + ACCESS_ONCE(*(u32 *)ptr) = (u32)val; + break; + case 8: + ACCESS_ONCE(*(u64 *)ptr) = (u64)val; + break; + default: + BUILD_BUG(); + } +} + +static inline unsigned long __percpu_xchg(void *ptr, unsigned long val, + int size) +{ + unsigned long ret, loop; + + switch (size) { + case 1: + do { + asm ("//__percpu_xchg_1\n" + "ldxrb %w[ret], %[ptr]\n" + "stxrb %w[loop], %w[val], %[ptr]\n" + : [loop] "=&r"(loop), [ret] "=&r"(ret), + [ptr] "+Q"(*(u8 *)ptr) + : [val] "r" (val)); + } while (loop); + break; + case 2: + do { + asm ("//__percpu_xchg_2\n" + "ldxrh %w[ret], %[ptr]\n" + "stxrh %w[loop], %w[val], %[ptr]\n" + : [loop] "=&r"(loop), [ret] "=&r"(ret), + [ptr] "+Q"(*(u16 *)ptr) + : [val] "r" (val)); + } while (loop); + break; + case 4: + do { + asm ("//__percpu_xchg_4\n" + "ldxr %w[ret], %[ptr]\n" + "stxr %w[loop], %w[val], %[ptr]\n" + : [loop] "=&r"(loop), [ret] "=&r"(ret), + [ptr] "+Q"(*(u32 *)ptr) + : [val] "r" (val)); + } while (loop); + break; + case 8: + do { + asm ("//__percpu_xchg_8\n" + "ldxr %[ret], %[ptr]\n" + "stxr %w[loop], %[val], %[ptr]\n" + : [loop] "=&r"(loop), [ret] "=&r"(ret), + [ptr] "+Q"(*(u64 *)ptr) + : [val] "r" (val)); + } while (loop); + break; + default: + BUILD_BUG(); + } + + return ret; +} + +#define _percpu_add(pcp, val) \ + __percpu_add(raw_cpu_ptr(&(pcp)), val, sizeof(pcp)) + +#define _percpu_add_return(pcp, val) (typeof(pcp)) (_percpu_add(pcp, val)) + +#define _percpu_and(pcp, val) \ + __percpu_and(raw_cpu_ptr(&(pcp)), val, sizeof(pcp)) + +#define _percpu_or(pcp, val) \ + __percpu_or(raw_cpu_ptr(&(pcp)), val, sizeof(pcp)) + +#define _percpu_read(pcp) (typeof(pcp)) \ + (__percpu_read(raw_cpu_ptr(&(pcp)), sizeof(pcp))) + +#define _percpu_write(pcp, val) \ + __percpu_write(raw_cpu_ptr(&(pcp)), (unsigned long)(val), sizeof(pcp)) + +#define _percpu_xchg(pcp, val) (typeof(pcp)) \ + (__percpu_xchg(raw_cpu_ptr(&(pcp)), (unsigned long)(val), sizeof(pcp))) + +#define this_cpu_add_1(pcp, val) _percpu_add(pcp, val) +#define this_cpu_add_2(pcp, val) _percpu_add(pcp, val) +#define this_cpu_add_4(pcp, val) _percpu_add(pcp, val) +#define this_cpu_add_8(pcp, val) _percpu_add(pcp, val) + +#define this_cpu_add_return_1(pcp, val) _percpu_add_return(pcp, val) +#define this_cpu_add_return_2(pcp, val) _percpu_add_return(pcp, val) +#define this_cpu_add_return_4(pcp, val) _percpu_add_return(pcp, val) +#define this_cpu_add_return_8(pcp, val) _percpu_add_return(pcp, val) + +#define this_cpu_and_1(pcp, val) _percpu_and(pcp, val) +#define this_cpu_and_2(pcp, val) _percpu_and(pcp, val) +#define this_cpu_and_4(pcp, val) _percpu_and(pcp, val) +#define this_cpu_and_8(pcp, val) _percpu_and(pcp, val) + +#define this_cpu_or_1(pcp, val) _percpu_or(pcp, val) +#define this_cpu_or_2(pcp, val) _percpu_or(pcp, val) +#define this_cpu_or_4(pcp, val) _percpu_or(pcp, val) +#define this_cpu_or_8(pcp, val) _percpu_or(pcp, val) + +#define this_cpu_read_1(pcp) _percpu_read(pcp) +#define this_cpu_read_2(pcp) _percpu_read(pcp) +#define this_cpu_read_4(pcp) _percpu_read(pcp) +#define this_cpu_read_8(pcp) _percpu_read(pcp) + +#define this_cpu_write_1(pcp, val) _percpu_write(pcp, val) +#define this_cpu_write_2(pcp, val) _percpu_write(pcp, val) +#define this_cpu_write_4(pcp, val) _percpu_write(pcp, val) +#define this_cpu_write_8(pcp, val) _percpu_write(pcp, val) + +#define this_cpu_xchg_1(pcp, val) _percpu_xchg(pcp, val) +#define this_cpu_xchg_2(pcp, val) _percpu_xchg(pcp, val) +#define this_cpu_xchg_4(pcp, val) _percpu_xchg(pcp, val) +#define this_cpu_xchg_8(pcp, val) _percpu_xchg(pcp, val) + #include #endif /* __ASM_PERCPU_H */ From 9b79f52d1a702dd5b160f9d2ee0199c3122809bb Mon Sep 17 00:00:00 2001 From: Punit Agrawal Date: Tue, 18 Nov 2014 11:41:22 +0000 Subject: [PATCH 28/70] arm64: Add support for hooks to handle undefined instructions Add support to register hooks for undefined instructions. The handlers will be called when the undefined instruction and the processor state (as contained in pstate) match criteria used at registration. Signed-off-by: Punit Agrawal Reviewed-by: Catalin Marinas Signed-off-by: Will Deacon --- arch/arm64/include/asm/insn.h | 2 ++ arch/arm64/include/asm/traps.h | 16 +++++++++ arch/arm64/kernel/insn.c | 5 +++ arch/arm64/kernel/traps.c | 66 ++++++++++++++++++++++++++++++++++ 4 files changed, 89 insertions(+) diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h index 56a9e63b6c33..1bb043018315 100644 --- a/arch/arm64/include/asm/insn.h +++ b/arch/arm64/include/asm/insn.h @@ -354,6 +354,8 @@ bool aarch64_insn_hotpatch_safe(u32 old_insn, u32 new_insn); int aarch64_insn_patch_text_nosync(void *addr, u32 insn); int aarch64_insn_patch_text_sync(void *addrs[], u32 insns[], int cnt); int aarch64_insn_patch_text(void *addrs[], u32 insns[], int cnt); + +bool aarch32_insn_is_wide(u32 insn); #endif /* __ASSEMBLY__ */ #endif /* __ASM_INSN_H */ diff --git a/arch/arm64/include/asm/traps.h b/arch/arm64/include/asm/traps.h index 10ca8ff93cc2..232e4ba5d314 100644 --- a/arch/arm64/include/asm/traps.h +++ b/arch/arm64/include/asm/traps.h @@ -18,6 +18,22 @@ #ifndef __ASM_TRAP_H #define __ASM_TRAP_H +#include + +struct pt_regs; + +struct undef_hook { + struct list_head node; + u32 instr_mask; + u32 instr_val; + u64 pstate_mask; + u64 pstate_val; + int (*fn)(struct pt_regs *regs, u32 instr); +}; + +void register_undef_hook(struct undef_hook *hook); +void unregister_undef_hook(struct undef_hook *hook); + static inline int in_exception_text(unsigned long ptr) { extern char __exception_text_start[]; diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c index e007714ded04..ab00eb58d385 100644 --- a/arch/arm64/kernel/insn.c +++ b/arch/arm64/kernel/insn.c @@ -959,3 +959,8 @@ u32 aarch64_insn_gen_logical_shifted_reg(enum aarch64_insn_register dst, return aarch64_insn_encode_immediate(AARCH64_INSN_IMM_6, insn, shift); } + +bool aarch32_insn_is_wide(u32 insn) +{ + return insn >= 0xe800; +} diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index de1b085e7963..0a801e3743d5 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -259,6 +259,69 @@ void arm64_notify_die(const char *str, struct pt_regs *regs, } } +static LIST_HEAD(undef_hook); +static DEFINE_RAW_SPINLOCK(undef_lock); + +void register_undef_hook(struct undef_hook *hook) +{ + unsigned long flags; + + raw_spin_lock_irqsave(&undef_lock, flags); + list_add(&hook->node, &undef_hook); + raw_spin_unlock_irqrestore(&undef_lock, flags); +} + +void unregister_undef_hook(struct undef_hook *hook) +{ + unsigned long flags; + + raw_spin_lock_irqsave(&undef_lock, flags); + list_del(&hook->node); + raw_spin_unlock_irqrestore(&undef_lock, flags); +} + +static int call_undef_hook(struct pt_regs *regs) +{ + struct undef_hook *hook; + unsigned long flags; + u32 instr; + int (*fn)(struct pt_regs *regs, u32 instr) = NULL; + void __user *pc = (void __user *)instruction_pointer(regs); + + if (!user_mode(regs)) + return 1; + + if (compat_thumb_mode(regs)) { + /* 16-bit Thumb instruction */ + if (get_user(instr, (u16 __user *)pc)) + goto exit; + instr = le16_to_cpu(instr); + if (aarch32_insn_is_wide(instr)) { + u32 instr2; + + if (get_user(instr2, (u16 __user *)(pc + 2))) + goto exit; + instr2 = le16_to_cpu(instr2); + instr = (instr << 16) | instr2; + } + } else { + /* 32-bit ARM instruction */ + if (get_user(instr, (u32 __user *)pc)) + goto exit; + instr = le32_to_cpu(instr); + } + + raw_spin_lock_irqsave(&undef_lock, flags); + list_for_each_entry(hook, &undef_hook, node) + if ((instr & hook->instr_mask) == hook->instr_val && + (regs->pstate & hook->pstate_mask) == hook->pstate_val) + fn = hook->fn; + + raw_spin_unlock_irqrestore(&undef_lock, flags); +exit: + return fn ? fn(regs, instr) : 1; +} + asmlinkage void __exception do_undefinstr(struct pt_regs *regs) { siginfo_t info; @@ -268,6 +331,9 @@ asmlinkage void __exception do_undefinstr(struct pt_regs *regs) if (!aarch32_break_handler(regs)) return; + if (call_undef_hook(regs) == 0) + return; + if (show_unhandled_signals && unhandled_signal(current, SIGILL) && printk_ratelimit()) { pr_info("%s[%d]: undefined instruction: pc=%p\n", From 0be0e44c182c4f13df13903fd1377671d157d7b7 Mon Sep 17 00:00:00 2001 From: Punit Agrawal Date: Tue, 18 Nov 2014 11:41:23 +0000 Subject: [PATCH 29/70] arm64: Add AArch32 instruction set condition code checks Port support for AArch32 instruction condition code checking from arm to arm64. Signed-off-by: Punit Agrawal Reviewed-by: Catalin Marinas Signed-off-by: Will Deacon --- arch/arm64/include/asm/opcodes.h | 1 + arch/arm64/kernel/Makefile | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) create mode 100644 arch/arm64/include/asm/opcodes.h diff --git a/arch/arm64/include/asm/opcodes.h b/arch/arm64/include/asm/opcodes.h new file mode 100644 index 000000000000..4e603ea36ad3 --- /dev/null +++ b/arch/arm64/include/asm/opcodes.h @@ -0,0 +1 @@ +#include <../../arm/include/asm/opcodes.h> diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 5bd029b43644..a4d8671d6d11 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -18,7 +18,8 @@ arm64-obj-y := cputable.o debug-monitors.o entry.o irq.o fpsimd.o \ cpuinfo.o arm64-obj-$(CONFIG_COMPAT) += sys32.o kuser32.o signal32.o \ - sys_compat.o + sys_compat.o \ + ../../arm/kernel/opcodes.o arm64-obj-$(CONFIG_FUNCTION_TRACER) += ftrace.o entry-ftrace.o arm64-obj-$(CONFIG_MODULES) += arm64ksyms.o module.o arm64-obj-$(CONFIG_SMP) += smp.o smp_spin_table.o topology.o From 587064b610c703f259317d00dc37bf6d40f4fc74 Mon Sep 17 00:00:00 2001 From: Punit Agrawal Date: Tue, 18 Nov 2014 11:41:24 +0000 Subject: [PATCH 30/70] arm64: Add framework for legacy instruction emulation Typically, providing support for legacy instructions requires emulating the behaviour of instructions whose encodings have become undefined. If the instructions haven't been removed from the architecture, there maybe an option in the implementation to turn on/off the support for these instructions. Create common infrastructure to support legacy instruction emulation. In addition to emulation, also provide an option to support hardware execution when supported. The default execution mode (one of undef, emulate, hw exeuction) is dependent on the state of the instruction (deprecated or obsolete) in the architecture and can specified at the time of registering the instruction handlers. The runtime state of the emulation can be controlled by writing to individual nodes in sysctl. The expected default behaviour is documented as part of this patch. Reviewed-by: Catalin Marinas Signed-off-by: Punit Agrawal Signed-off-by: Will Deacon --- Documentation/arm64/legacy_instructions.txt | 33 +++ arch/arm64/Kconfig | 18 ++ arch/arm64/kernel/Makefile | 1 + arch/arm64/kernel/armv8_deprecated.c | 216 ++++++++++++++++++++ 4 files changed, 268 insertions(+) create mode 100644 Documentation/arm64/legacy_instructions.txt create mode 100644 arch/arm64/kernel/armv8_deprecated.c diff --git a/Documentation/arm64/legacy_instructions.txt b/Documentation/arm64/legacy_instructions.txt new file mode 100644 index 000000000000..49d4867c0c09 --- /dev/null +++ b/Documentation/arm64/legacy_instructions.txt @@ -0,0 +1,33 @@ +The arm64 port of the Linux kernel provides infrastructure to support +emulation of instructions which have been deprecated, or obsoleted in +the architecture. The infrastructure code uses undefined instruction +hooks to support emulation. Where available it also allows turning on +the instruction execution in hardware. + +The emulation mode can be controlled by writing to sysctl nodes +(/proc/sys/abi). The following explains the different execution +behaviours and the corresponding values of the sysctl nodes - + +* Undef + Value: 0 + Generates undefined instruction abort. Default for instructions that + have been obsoleted in the architecture, e.g., SWP + +* Emulate + Value: 1 + Uses software emulation. To aid migration of software, in this mode + usage of emulated instruction is traced as well as rate limited + warnings are issued. This is the default for deprecated + instructions, .e.g., CP15 barriers + +* Hardware Execution + Value: 2 + Although marked as deprecated, some implementations may support the + enabling/disabling of hardware support for the execution of these + instructions. Using hardware execution generally provides better + performance, but at the loss of ability to gather runtime statistics + about the use of the deprecated instructions. + +The default mode depends on the status of the instruction in the +architecture. Deprecated instructions should default to emulation +while obsolete instructions must be undefined by default. diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 1e0e4671dd25..aa8f4bea3738 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -164,6 +164,24 @@ config ARCH_XGENE help This enables support for AppliedMicro X-Gene SOC Family +comment "Processor Features" + +menuconfig ARMV8_DEPRECATED + bool "Emulate deprecated/obsolete ARMv8 instructions" + depends on COMPAT + help + Legacy software support may require certain instructions + that have been deprecated or obsoleted in the architecture. + + Enable this config to enable selective emulation of these + features. + + If unsure, say Y + +if ARMV8_DEPRECATED + +endif + endmenu menu "Bus support" diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index a4d8671d6d11..84e9e5153efe 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -32,6 +32,7 @@ arm64-obj-$(CONFIG_JUMP_LABEL) += jump_label.o arm64-obj-$(CONFIG_KGDB) += kgdb.o arm64-obj-$(CONFIG_EFI) += efi.o efi-stub.o efi-entry.o arm64-obj-$(CONFIG_PCI) += pci.o +arm64-obj-$(CONFIG_ARMV8_DEPRECATED) += armv8_deprecated.o obj-y += $(arm64-obj-y) vdso/ obj-m += $(arm64-obj-m) diff --git a/arch/arm64/kernel/armv8_deprecated.c b/arch/arm64/kernel/armv8_deprecated.c new file mode 100644 index 000000000000..db3b79da6a48 --- /dev/null +++ b/arch/arm64/kernel/armv8_deprecated.c @@ -0,0 +1,216 @@ +/* + * Copyright (C) 2014 ARM Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include + +#include + +/* + * The runtime support for deprecated instruction support can be in one of + * following three states - + * + * 0 = undef + * 1 = emulate (software emulation) + * 2 = hw (supported in hardware) + */ +enum insn_emulation_mode { + INSN_UNDEF, + INSN_EMULATE, + INSN_HW, +}; + +enum legacy_insn_status { + INSN_DEPRECATED, + INSN_OBSOLETE, +}; + +struct insn_emulation_ops { + const char *name; + enum legacy_insn_status status; + struct undef_hook *hooks; + int (*set_hw_mode)(bool enable); +}; + +struct insn_emulation { + struct list_head node; + struct insn_emulation_ops *ops; + int current_mode; + int min; + int max; +}; + +static LIST_HEAD(insn_emulation); +static int nr_insn_emulated; +static DEFINE_RAW_SPINLOCK(insn_emulation_lock); + +static void register_emulation_hooks(struct insn_emulation_ops *ops) +{ + struct undef_hook *hook; + + BUG_ON(!ops->hooks); + + for (hook = ops->hooks; hook->instr_mask; hook++) + register_undef_hook(hook); + + pr_notice("Registered %s emulation handler\n", ops->name); +} + +static void remove_emulation_hooks(struct insn_emulation_ops *ops) +{ + struct undef_hook *hook; + + BUG_ON(!ops->hooks); + + for (hook = ops->hooks; hook->instr_mask; hook++) + unregister_undef_hook(hook); + + pr_notice("Removed %s emulation handler\n", ops->name); +} + +static int update_insn_emulation_mode(struct insn_emulation *insn, + enum insn_emulation_mode prev) +{ + int ret = 0; + + switch (prev) { + case INSN_UNDEF: /* Nothing to be done */ + break; + case INSN_EMULATE: + remove_emulation_hooks(insn->ops); + break; + case INSN_HW: + if (insn->ops->set_hw_mode) { + insn->ops->set_hw_mode(false); + pr_notice("Disabled %s support\n", insn->ops->name); + } + break; + } + + switch (insn->current_mode) { + case INSN_UNDEF: + break; + case INSN_EMULATE: + register_emulation_hooks(insn->ops); + break; + case INSN_HW: + if (insn->ops->set_hw_mode && insn->ops->set_hw_mode(true)) + pr_notice("Enabled %s support\n", insn->ops->name); + else + ret = -EINVAL; + break; + } + + return ret; +} + +static void register_insn_emulation(struct insn_emulation_ops *ops) +{ + unsigned long flags; + struct insn_emulation *insn; + + insn = kzalloc(sizeof(*insn), GFP_KERNEL); + insn->ops = ops; + insn->min = INSN_UNDEF; + + switch (ops->status) { + case INSN_DEPRECATED: + insn->current_mode = INSN_EMULATE; + insn->max = INSN_HW; + break; + case INSN_OBSOLETE: + insn->current_mode = INSN_UNDEF; + insn->max = INSN_EMULATE; + break; + } + + raw_spin_lock_irqsave(&insn_emulation_lock, flags); + list_add(&insn->node, &insn_emulation); + nr_insn_emulated++; + raw_spin_unlock_irqrestore(&insn_emulation_lock, flags); + + /* Register any handlers if required */ + update_insn_emulation_mode(insn, INSN_UNDEF); +} + +static int emulation_proc_handler(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, + loff_t *ppos) +{ + int ret = 0; + struct insn_emulation *insn = (struct insn_emulation *) table->data; + enum insn_emulation_mode prev_mode = insn->current_mode; + + table->data = &insn->current_mode; + ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); + + if (ret || !write || prev_mode == insn->current_mode) + goto ret; + + ret = update_insn_emulation_mode(insn, prev_mode); + if (!ret) { + /* Mode change failed, revert to previous mode. */ + insn->current_mode = prev_mode; + update_insn_emulation_mode(insn, INSN_UNDEF); + } +ret: + table->data = insn; + return ret; +} + +static struct ctl_table ctl_abi[] = { + { + .procname = "abi", + .mode = 0555, + }, + { } +}; + +static void register_insn_emulation_sysctl(struct ctl_table *table) +{ + unsigned long flags; + int i = 0; + struct insn_emulation *insn; + struct ctl_table *insns_sysctl, *sysctl; + + insns_sysctl = kzalloc(sizeof(*sysctl) * (nr_insn_emulated + 1), + GFP_KERNEL); + + raw_spin_lock_irqsave(&insn_emulation_lock, flags); + list_for_each_entry(insn, &insn_emulation, node) { + sysctl = &insns_sysctl[i]; + + sysctl->mode = 0644; + sysctl->maxlen = sizeof(int); + + sysctl->procname = insn->ops->name; + sysctl->data = insn; + sysctl->extra1 = &insn->min; + sysctl->extra2 = &insn->max; + sysctl->proc_handler = emulation_proc_handler; + i++; + } + raw_spin_unlock_irqrestore(&insn_emulation_lock, flags); + + table->child = insns_sysctl; + register_sysctl_table(table); +} + +/* + * Invoked as late_initcall, since not needed before init spawned. + */ +static int __init armv8_deprecated_init(void) +{ + register_insn_emulation_sysctl(ctl_abi); + + return 0; +} + +late_initcall(armv8_deprecated_init); From bd35a4adc4131c530ec7d90242555eac7b3dbe3f Mon Sep 17 00:00:00 2001 From: Punit Agrawal Date: Tue, 18 Nov 2014 11:41:25 +0000 Subject: [PATCH 31/70] arm64: Port SWP/SWPB emulation support from arm The SWP instruction was deprecated in the ARMv6 architecture. The ARMv7 multiprocessing extensions mandate that SWP/SWPB instructions are treated as undefined from reset, with the ability to enable them through the System Control Register SW bit. With ARMv8, the option to enable these instructions through System Control Register was dropped as well. To support legacy applications using these instructions, port the emulation of the SWP and SWPB instructions from the arm port to arm64. Reviewed-by: Catalin Marinas Signed-off-by: Punit Agrawal Signed-off-by: Will Deacon --- Documentation/arm64/legacy_instructions.txt | 7 + arch/arm64/Kconfig | 21 +++ arch/arm64/include/asm/insn.h | 6 + arch/arm64/kernel/armv8_deprecated.c | 191 ++++++++++++++++++++ arch/arm64/kernel/insn.c | 8 + 5 files changed, 233 insertions(+) diff --git a/Documentation/arm64/legacy_instructions.txt b/Documentation/arm64/legacy_instructions.txt index 49d4867c0c09..5ab58614b7ed 100644 --- a/Documentation/arm64/legacy_instructions.txt +++ b/Documentation/arm64/legacy_instructions.txt @@ -31,3 +31,10 @@ behaviours and the corresponding values of the sysctl nodes - The default mode depends on the status of the instruction in the architecture. Deprecated instructions should default to emulation while obsolete instructions must be undefined by default. + +Supported legacy instructions +----------------------------- +* SWP{B} +Node: /proc/sys/abi/swp +Status: Obsolete +Default: Undef (0) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index aa8f4bea3738..2b6213840ec8 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -180,6 +180,27 @@ menuconfig ARMV8_DEPRECATED if ARMV8_DEPRECATED +config SWP_EMULATION + bool "Emulate SWP/SWPB instructions" + help + ARMv8 obsoletes the use of A32 SWP/SWPB instructions such that + they are always undefined. Say Y here to enable software + emulation of these instructions for userspace using LDXR/STXR. + + In some older versions of glibc [<=2.8] SWP is used during futex + trylock() operations with the assumption that the code will not + be preempted. This invalid assumption may be more likely to fail + with SWP emulation enabled, leading to deadlock of the user + application. + + NOTE: when accessing uncached shared regions, LDXR/STXR rely + on an external transaction monitoring block called a global + monitor to maintain update atomicity. If your system does not + implement a global monitor, this option can cause programs that + perform SWP operations to uncached memory to deadlock. + + If unsure, say Y + endif endmenu diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h index 1bb043018315..3ecc57c47c04 100644 --- a/arch/arm64/include/asm/insn.h +++ b/arch/arm64/include/asm/insn.h @@ -356,6 +356,12 @@ int aarch64_insn_patch_text_sync(void *addrs[], u32 insns[], int cnt); int aarch64_insn_patch_text(void *addrs[], u32 insns[], int cnt); bool aarch32_insn_is_wide(u32 insn); + +#define A32_RN_OFFSET 16 +#define A32_RT_OFFSET 12 +#define A32_RT2_OFFSET 0 + +u32 aarch32_insn_extract_reg_num(u32 insn, int offset); #endif /* __ASSEMBLY__ */ #endif /* __ASM_INSN_H */ diff --git a/arch/arm64/kernel/armv8_deprecated.c b/arch/arm64/kernel/armv8_deprecated.c index db3b79da6a48..98865a7868f1 100644 --- a/arch/arm64/kernel/armv8_deprecated.c +++ b/arch/arm64/kernel/armv8_deprecated.c @@ -8,10 +8,16 @@ #include #include +#include +#include #include #include +#include +#include +#include #include +#include /* * The runtime support for deprecated instruction support can be in one of @@ -203,11 +209,196 @@ static void register_insn_emulation_sysctl(struct ctl_table *table) register_sysctl_table(table); } +/* + * Implement emulation of the SWP/SWPB instructions using load-exclusive and + * store-exclusive. + * + * Syntax of SWP{B} instruction: SWP{B} , , [] + * Where: Rt = destination + * Rt2 = source + * Rn = address + */ + +/* + * Error-checking SWP macros implemented using ldxr{b}/stxr{b} + */ +#define __user_swpX_asm(data, addr, res, temp, B) \ + __asm__ __volatile__( \ + " mov %w2, %w1\n" \ + "0: ldxr"B" %w1, [%3]\n" \ + "1: stxr"B" %w0, %w2, [%3]\n" \ + " cbz %w0, 2f\n" \ + " mov %w0, %w4\n" \ + "2:\n" \ + " .pushsection .fixup,\"ax\"\n" \ + " .align 2\n" \ + "3: mov %w0, %w5\n" \ + " b 2b\n" \ + " .popsection" \ + " .pushsection __ex_table,\"a\"\n" \ + " .align 3\n" \ + " .quad 0b, 3b\n" \ + " .quad 1b, 3b\n" \ + " .popsection" \ + : "=&r" (res), "+r" (data), "=&r" (temp) \ + : "r" (addr), "i" (-EAGAIN), "i" (-EFAULT) \ + : "memory") + +#define __user_swp_asm(data, addr, res, temp) \ + __user_swpX_asm(data, addr, res, temp, "") +#define __user_swpb_asm(data, addr, res, temp) \ + __user_swpX_asm(data, addr, res, temp, "b") + +/* + * Bit 22 of the instruction encoding distinguishes between + * the SWP and SWPB variants (bit set means SWPB). + */ +#define TYPE_SWPB (1 << 22) + +/* + * Set up process info to signal segmentation fault - called on access error. + */ +static void set_segfault(struct pt_regs *regs, unsigned long addr) +{ + siginfo_t info; + + down_read(¤t->mm->mmap_sem); + if (find_vma(current->mm, addr) == NULL) + info.si_code = SEGV_MAPERR; + else + info.si_code = SEGV_ACCERR; + up_read(¤t->mm->mmap_sem); + + info.si_signo = SIGSEGV; + info.si_errno = 0; + info.si_addr = (void *) instruction_pointer(regs); + + pr_debug("SWP{B} emulation: access caused memory abort!\n"); + arm64_notify_die("Illegal memory access", regs, &info, 0); +} + +static int emulate_swpX(unsigned int address, unsigned int *data, + unsigned int type) +{ + unsigned int res = 0; + + if ((type != TYPE_SWPB) && (address & 0x3)) { + /* SWP to unaligned address not permitted */ + pr_debug("SWP instruction on unaligned pointer!\n"); + return -EFAULT; + } + + while (1) { + unsigned long temp; + + if (type == TYPE_SWPB) + __user_swpb_asm(*data, address, res, temp); + else + __user_swp_asm(*data, address, res, temp); + + if (likely(res != -EAGAIN) || signal_pending(current)) + break; + + cond_resched(); + } + + return res; +} + +/* + * swp_handler logs the id of calling process, dissects the instruction, sanity + * checks the memory location, calls emulate_swpX for the actual operation and + * deals with fixup/error handling before returning + */ +static int swp_handler(struct pt_regs *regs, u32 instr) +{ + u32 destreg, data, type, address = 0; + int rn, rt2, res = 0; + + perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, regs->pc); + + type = instr & TYPE_SWPB; + + switch (arm_check_condition(instr, regs->pstate)) { + case ARM_OPCODE_CONDTEST_PASS: + break; + case ARM_OPCODE_CONDTEST_FAIL: + /* Condition failed - return to next instruction */ + goto ret; + case ARM_OPCODE_CONDTEST_UNCOND: + /* If unconditional encoding - not a SWP, undef */ + return -EFAULT; + default: + return -EINVAL; + } + + rn = aarch32_insn_extract_reg_num(instr, A32_RN_OFFSET); + rt2 = aarch32_insn_extract_reg_num(instr, A32_RT2_OFFSET); + + address = (u32)regs->user_regs.regs[rn]; + data = (u32)regs->user_regs.regs[rt2]; + destreg = aarch32_insn_extract_reg_num(instr, A32_RT_OFFSET); + + pr_debug("addr in r%d->0x%08x, dest is r%d, source in r%d->0x%08x)\n", + rn, address, destreg, + aarch32_insn_extract_reg_num(instr, A32_RT2_OFFSET), data); + + /* Check access in reasonable access range for both SWP and SWPB */ + if (!access_ok(VERIFY_WRITE, (address & ~3), 4)) { + pr_debug("SWP{B} emulation: access to 0x%08x not allowed!\n", + address); + goto fault; + } + + res = emulate_swpX(address, &data, type); + if (res == -EFAULT) + goto fault; + else if (res == 0) + regs->user_regs.regs[destreg] = data; + +ret: + pr_warn_ratelimited("\"%s\" (%ld) uses obsolete SWP{B} instruction at 0x%llx\n", + current->comm, (unsigned long)current->pid, regs->pc); + + regs->pc += 4; + return 0; + +fault: + set_segfault(regs, address); + + return 0; +} + +/* + * Only emulate SWP/SWPB executed in ARM state/User mode. + * The kernel must be SWP free and SWP{B} does not exist in Thumb. + */ +static struct undef_hook swp_hooks[] = { + { + .instr_mask = 0x0fb00ff0, + .instr_val = 0x01000090, + .pstate_mask = COMPAT_PSR_MODE_MASK, + .pstate_val = COMPAT_PSR_MODE_USR, + .fn = swp_handler + }, + { } +}; + +static struct insn_emulation_ops swp_ops = { + .name = "swp", + .status = INSN_OBSOLETE, + .hooks = swp_hooks, + .set_hw_mode = NULL, +}; + /* * Invoked as late_initcall, since not needed before init spawned. */ static int __init armv8_deprecated_init(void) { + if (IS_ENABLED(CONFIG_SWP_EMULATION)) + register_insn_emulation(&swp_ops); + register_insn_emulation_sysctl(ctl_abi); return 0; diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c index ab00eb58d385..63122dcd8524 100644 --- a/arch/arm64/kernel/insn.c +++ b/arch/arm64/kernel/insn.c @@ -964,3 +964,11 @@ bool aarch32_insn_is_wide(u32 insn) { return insn >= 0xe800; } + +/* + * Macros/defines for extracting register numbers from instruction. + */ +u32 aarch32_insn_extract_reg_num(u32 insn, int offset) +{ + return (insn & (0xf << offset)) >> offset; +} From c852f320584600a372646055d8229e063949eee7 Mon Sep 17 00:00:00 2001 From: Punit Agrawal Date: Tue, 18 Nov 2014 11:41:26 +0000 Subject: [PATCH 32/70] arm64: Emulate CP15 Barrier instructions The CP15 barrier instructions (CP15ISB, CP15DSB and CP15DMB) are deprecated in the ARMv7 architecture, superseded by ISB, DSB and DMB instructions respectively. Some implementations may provide the ability to disable the CP15 barriers by disabling the CP15BEN bit in SCTLR_EL1. If not enabled, the encodings for these instructions become undefined. To support legacy software using these instructions, this patch register hooks to - * emulate CP15 barriers and warn the user about their use * toggle CP15BEN in SCTLR_EL1 Signed-off-by: Punit Agrawal Reviewed-by: Catalin Marinas Signed-off-by: Will Deacon --- Documentation/arm64/legacy_instructions.txt | 5 + arch/arm64/Kconfig | 15 +++ arch/arm64/include/asm/insn.h | 2 + arch/arm64/kernel/armv8_deprecated.c | 131 ++++++++++++++++++++ arch/arm64/kernel/insn.c | 13 ++ 5 files changed, 166 insertions(+) diff --git a/Documentation/arm64/legacy_instructions.txt b/Documentation/arm64/legacy_instructions.txt index 5ab58614b7ed..a3b3da2ec6ed 100644 --- a/Documentation/arm64/legacy_instructions.txt +++ b/Documentation/arm64/legacy_instructions.txt @@ -38,3 +38,8 @@ Supported legacy instructions Node: /proc/sys/abi/swp Status: Obsolete Default: Undef (0) + +* CP15 Barriers +Node: /proc/sys/abi/cp15_barrier +Status: Deprecated +Default: Emulate (1) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 2b6213840ec8..8e74dfe1e74d 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -201,6 +201,21 @@ config SWP_EMULATION If unsure, say Y +config CP15_BARRIER_EMULATION + bool "Emulate CP15 Barrier instructions" + help + The CP15 barrier instructions - CP15ISB, CP15DSB, and + CP15DMB - are deprecated in ARMv8 (and ARMv7). It is + strongly recommended to use the ISB, DSB, and DMB + instructions instead. + + Say Y here to enable software emulation of these + instructions for AArch32 userspace code. When this option is + enabled, CP15 barrier usage is traced which can help + identify software that needs updating. + + If unsure, say Y + endif endmenu diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h index 3ecc57c47c04..e2ff32a93b5c 100644 --- a/arch/arm64/include/asm/insn.h +++ b/arch/arm64/include/asm/insn.h @@ -362,6 +362,8 @@ bool aarch32_insn_is_wide(u32 insn); #define A32_RT2_OFFSET 0 u32 aarch32_insn_extract_reg_num(u32 insn, int offset); +u32 aarch32_insn_mcr_extract_opc2(u32 insn); +u32 aarch32_insn_mcr_extract_crm(u32 insn); #endif /* __ASSEMBLY__ */ #endif /* __ASM_INSN_H */ diff --git a/arch/arm64/kernel/armv8_deprecated.c b/arch/arm64/kernel/armv8_deprecated.c index 98865a7868f1..401c2e544ede 100644 --- a/arch/arm64/kernel/armv8_deprecated.c +++ b/arch/arm64/kernel/armv8_deprecated.c @@ -6,6 +6,7 @@ * published by the Free Software Foundation. */ +#include #include #include #include @@ -391,6 +392,133 @@ static struct insn_emulation_ops swp_ops = { .set_hw_mode = NULL, }; +static int cp15barrier_handler(struct pt_regs *regs, u32 instr) +{ + perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, regs->pc); + + switch (arm_check_condition(instr, regs->pstate)) { + case ARM_OPCODE_CONDTEST_PASS: + break; + case ARM_OPCODE_CONDTEST_FAIL: + /* Condition failed - return to next instruction */ + goto ret; + case ARM_OPCODE_CONDTEST_UNCOND: + /* If unconditional encoding - not a barrier instruction */ + return -EFAULT; + default: + return -EINVAL; + } + + switch (aarch32_insn_mcr_extract_crm(instr)) { + case 10: + /* + * dmb - mcr p15, 0, Rt, c7, c10, 5 + * dsb - mcr p15, 0, Rt, c7, c10, 4 + */ + if (aarch32_insn_mcr_extract_opc2(instr) == 5) + dmb(sy); + else + dsb(sy); + break; + case 5: + /* + * isb - mcr p15, 0, Rt, c7, c5, 4 + * + * Taking an exception or returning from one acts as an + * instruction barrier. So no explicit barrier needed here. + */ + break; + } + +ret: + pr_warn_ratelimited("\"%s\" (%ld) uses deprecated CP15 Barrier instruction at 0x%llx\n", + current->comm, (unsigned long)current->pid, regs->pc); + + regs->pc += 4; + return 0; +} + +#define SCTLR_EL1_CP15BEN (1 << 5) + +static inline void config_sctlr_el1(u32 clear, u32 set) +{ + u32 val; + + asm volatile("mrs %0, sctlr_el1" : "=r" (val)); + val &= ~clear; + val |= set; + asm volatile("msr sctlr_el1, %0" : : "r" (val)); +} + +static void enable_cp15_ben(void *info) +{ + config_sctlr_el1(0, SCTLR_EL1_CP15BEN); +} + +static void disable_cp15_ben(void *info) +{ + config_sctlr_el1(SCTLR_EL1_CP15BEN, 0); +} + +static int cpu_hotplug_notify(struct notifier_block *b, + unsigned long action, void *hcpu) +{ + switch (action) { + case CPU_STARTING: + case CPU_STARTING_FROZEN: + enable_cp15_ben(NULL); + return NOTIFY_DONE; + case CPU_DYING: + case CPU_DYING_FROZEN: + disable_cp15_ben(NULL); + return NOTIFY_DONE; + } + + return NOTIFY_OK; +} + +static struct notifier_block cpu_hotplug_notifier = { + .notifier_call = cpu_hotplug_notify, +}; + +static int cp15_barrier_set_hw_mode(bool enable) +{ + if (enable) { + register_cpu_notifier(&cpu_hotplug_notifier); + on_each_cpu(enable_cp15_ben, NULL, true); + } else { + unregister_cpu_notifier(&cpu_hotplug_notifier); + on_each_cpu(disable_cp15_ben, NULL, true); + } + + return true; +} + +static struct undef_hook cp15_barrier_hooks[] = { + { + .instr_mask = 0x0fff0fdf, + .instr_val = 0x0e070f9a, + .pstate_mask = COMPAT_PSR_MODE_MASK, + .pstate_val = COMPAT_PSR_MODE_USR, + .fn = cp15barrier_handler, + }, + { + .instr_mask = 0x0fff0fff, + .instr_val = 0x0e070f95, + .pstate_mask = COMPAT_PSR_MODE_MASK, + .pstate_val = COMPAT_PSR_MODE_USR, + .fn = cp15barrier_handler, + }, + { } +}; + +static struct insn_emulation_ops cp15_barrier_ops = { + .name = "cp15_barrier", + .status = INSN_DEPRECATED, + .hooks = cp15_barrier_hooks, + .set_hw_mode = cp15_barrier_set_hw_mode, +}; + /* * Invoked as late_initcall, since not needed before init spawned. */ @@ -399,6 +527,9 @@ static int __init armv8_deprecated_init(void) if (IS_ENABLED(CONFIG_SWP_EMULATION)) register_insn_emulation(&swp_ops); + if (IS_ENABLED(CONFIG_CP15_BARRIER_EMULATION)) + register_insn_emulation(&cp15_barrier_ops); + register_insn_emulation_sysctl(ctl_abi); return 0; diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c index 63122dcd8524..819e409029ce 100644 --- a/arch/arm64/kernel/insn.c +++ b/arch/arm64/kernel/insn.c @@ -972,3 +972,16 @@ u32 aarch32_insn_extract_reg_num(u32 insn, int offset) { return (insn & (0xf << offset)) >> offset; } + +#define OPC2_MASK 0x7 +#define OPC2_OFFSET 5 +u32 aarch32_insn_mcr_extract_opc2(u32 insn) +{ + return (insn & (OPC2_MASK << OPC2_OFFSET)) >> OPC2_OFFSET; +} + +#define CRM_MASK 0xf +u32 aarch32_insn_mcr_extract_crm(u32 insn) +{ + return insn & CRM_MASK; +} From d784e2988a3e70a6f1047e80e01465a903ea2eba Mon Sep 17 00:00:00 2001 From: Punit Agrawal Date: Tue, 18 Nov 2014 11:41:27 +0000 Subject: [PATCH 33/70] arm64: Trace emulation of AArch32 legacy instructions Introduce an event to trace the usage of emulated instructions. The trace event is intended to help identify and encourage the migration of legacy software using the emulation features. Use this event to trace usage of swp and CP15 barrier emulation. Acked-by: Steven Rostedt Acked-by: Catalin Marinas Signed-off-by: Punit Agrawal Signed-off-by: Will Deacon --- arch/arm64/kernel/Makefile | 1 + arch/arm64/kernel/armv8_deprecated.c | 19 ++++++++++-- arch/arm64/kernel/trace-events-emulation.h | 35 ++++++++++++++++++++++ 3 files changed, 53 insertions(+), 2 deletions(-) create mode 100644 arch/arm64/kernel/trace-events-emulation.h diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 84e9e5153efe..b36ebd0aacbb 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -5,6 +5,7 @@ CPPFLAGS_vmlinux.lds := -DTEXT_OFFSET=$(TEXT_OFFSET) AFLAGS_head.o := -DTEXT_OFFSET=$(TEXT_OFFSET) CFLAGS_efi-stub.o := -DTEXT_OFFSET=$(TEXT_OFFSET) +CFLAGS_armv8_deprecated.o := -I$(src) CFLAGS_REMOVE_ftrace.o = -pg CFLAGS_REMOVE_insn.o = -pg diff --git a/arch/arm64/kernel/armv8_deprecated.c b/arch/arm64/kernel/armv8_deprecated.c index 401c2e544ede..529aad93336f 100644 --- a/arch/arm64/kernel/armv8_deprecated.c +++ b/arch/arm64/kernel/armv8_deprecated.c @@ -20,6 +20,9 @@ #include #include +#define CREATE_TRACE_POINTS +#include "trace-events-emulation.h" + /* * The runtime support for deprecated instruction support can be in one of * following three states - @@ -358,6 +361,11 @@ static int swp_handler(struct pt_regs *regs, u32 instr) regs->user_regs.regs[destreg] = data; ret: + if (type == TYPE_SWPB) + trace_instruction_emulation("swpb", regs->pc); + else + trace_instruction_emulation("swp", regs->pc); + pr_warn_ratelimited("\"%s\" (%ld) uses obsolete SWP{B} instruction at 0x%llx\n", current->comm, (unsigned long)current->pid, regs->pc); @@ -415,10 +423,15 @@ static int cp15barrier_handler(struct pt_regs *regs, u32 instr) * dmb - mcr p15, 0, Rt, c7, c10, 5 * dsb - mcr p15, 0, Rt, c7, c10, 4 */ - if (aarch32_insn_mcr_extract_opc2(instr) == 5) + if (aarch32_insn_mcr_extract_opc2(instr) == 5) { dmb(sy); - else + trace_instruction_emulation( + "mcr p15, 0, Rt, c7, c10, 5 ; dmb", regs->pc); + } else { dsb(sy); + trace_instruction_emulation( + "mcr p15, 0, Rt, c7, c10, 4 ; dsb", regs->pc); + } break; case 5: /* @@ -427,6 +440,8 @@ static int cp15barrier_handler(struct pt_regs *regs, u32 instr) * Taking an exception or returning from one acts as an * instruction barrier. So no explicit barrier needed here. */ + trace_instruction_emulation( + "mcr p15, 0, Rt, c7, c5, 4 ; isb", regs->pc); break; } diff --git a/arch/arm64/kernel/trace-events-emulation.h b/arch/arm64/kernel/trace-events-emulation.h new file mode 100644 index 000000000000..ae1dd598ea65 --- /dev/null +++ b/arch/arm64/kernel/trace-events-emulation.h @@ -0,0 +1,35 @@ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM emulation + +#if !defined(_TRACE_EMULATION_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_EMULATION_H + +#include + +TRACE_EVENT(instruction_emulation, + + TP_PROTO(const char *instr, u64 addr), + TP_ARGS(instr, addr), + + TP_STRUCT__entry( + __string(instr, instr) + __field(u64, addr) + ), + + TP_fast_assign( + __assign_str(instr, instr); + __entry->addr = addr; + ), + + TP_printk("instr=\"%s\" addr=0x%llx", __get_str(instr), __entry->addr) +); + +#endif /* _TRACE_EMULATION_H */ + +/* This part must be outside protection */ +#undef TRACE_INCLUDE_PATH +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_PATH . + +#define TRACE_INCLUDE_FILE trace-events-emulation +#include From 1b907f46db07405b6676addb91b32c546d772fcd Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 20 Nov 2014 16:51:10 +0000 Subject: [PATCH 34/70] arm64: kconfig: move emulation option under kernel features Having the instruction emulation submenu underneath "platform selection" is a great way to hide options we don't want people to use, but somewhat confusing when you stumble across it there. Move the menuconfig option underneath "kernel features", where it makes a bit more sense. Signed-off-by: Will Deacon --- arch/arm64/Kconfig | 106 ++++++++++++++++++++++----------------------- 1 file changed, 52 insertions(+), 54 deletions(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 8e74dfe1e74d..2294be00f0ca 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -164,60 +164,6 @@ config ARCH_XGENE help This enables support for AppliedMicro X-Gene SOC Family -comment "Processor Features" - -menuconfig ARMV8_DEPRECATED - bool "Emulate deprecated/obsolete ARMv8 instructions" - depends on COMPAT - help - Legacy software support may require certain instructions - that have been deprecated or obsoleted in the architecture. - - Enable this config to enable selective emulation of these - features. - - If unsure, say Y - -if ARMV8_DEPRECATED - -config SWP_EMULATION - bool "Emulate SWP/SWPB instructions" - help - ARMv8 obsoletes the use of A32 SWP/SWPB instructions such that - they are always undefined. Say Y here to enable software - emulation of these instructions for userspace using LDXR/STXR. - - In some older versions of glibc [<=2.8] SWP is used during futex - trylock() operations with the assumption that the code will not - be preempted. This invalid assumption may be more likely to fail - with SWP emulation enabled, leading to deadlock of the user - application. - - NOTE: when accessing uncached shared regions, LDXR/STXR rely - on an external transaction monitoring block called a global - monitor to maintain update atomicity. If your system does not - implement a global monitor, this option can cause programs that - perform SWP operations to uncached memory to deadlock. - - If unsure, say Y - -config CP15_BARRIER_EMULATION - bool "Emulate CP15 Barrier instructions" - help - The CP15 barrier instructions - CP15ISB, CP15DSB, and - CP15DMB - are deprecated in ARMv8 (and ARMv7). It is - strongly recommended to use the ISB, DSB, and DMB - instructions instead. - - Say Y here to enable software emulation of these - instructions for AArch32 userspace code. When this option is - enabled, CP15 barrier usage is traced which can help - identify software that needs updating. - - If unsure, say Y - -endif - endmenu menu "Bus support" @@ -417,6 +363,58 @@ config FORCE_MAX_ZONEORDER default "14" if (ARM64_64K_PAGES && TRANSPARENT_HUGEPAGE) default "11" +menuconfig ARMV8_DEPRECATED + bool "Emulate deprecated/obsolete ARMv8 instructions" + depends on COMPAT + help + Legacy software support may require certain instructions + that have been deprecated or obsoleted in the architecture. + + Enable this config to enable selective emulation of these + features. + + If unsure, say Y + +if ARMV8_DEPRECATED + +config SWP_EMULATION + bool "Emulate SWP/SWPB instructions" + help + ARMv8 obsoletes the use of A32 SWP/SWPB instructions such that + they are always undefined. Say Y here to enable software + emulation of these instructions for userspace using LDXR/STXR. + + In some older versions of glibc [<=2.8] SWP is used during futex + trylock() operations with the assumption that the code will not + be preempted. This invalid assumption may be more likely to fail + with SWP emulation enabled, leading to deadlock of the user + application. + + NOTE: when accessing uncached shared regions, LDXR/STXR rely + on an external transaction monitoring block called a global + monitor to maintain update atomicity. If your system does not + implement a global monitor, this option can cause programs that + perform SWP operations to uncached memory to deadlock. + + If unsure, say Y + +config CP15_BARRIER_EMULATION + bool "Emulate CP15 Barrier instructions" + help + The CP15 barrier instructions - CP15ISB, CP15DSB, and + CP15DMB - are deprecated in ARMv8 (and ARMv7). It is + strongly recommended to use the ISB, DSB, and DMB + instructions instead. + + Say Y here to enable software emulation of these + instructions for AArch32 userspace code. When this option is + enabled, CP15 barrier usage is traced which can help + identify software that needs updating. + + If unsure, say Y + +endif + endmenu menu "Boot options" From 7f73f7aef824b8bc27046edaf6b73bca4b0e7669 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 21 Nov 2014 14:22:22 +0000 Subject: [PATCH 35/70] arm64: mm: report unhandled level-0 translation faults correctly Translation faults that occur due to the input address being outside of the address range mapped by the relevant base register are reported as level 0 faults in ESR.DFSC. If the faulting access cannot be resolved by the kernel (e.g. because it is not mapped by a vma), then we report "input address range fault" on the console. This was fine until we added support for 48-bit VAs, which actually place PGDs at level 0 and can trigger faults for invalid addresses that are within the range of the page tables. This patch changes the string to report "level 0 translation fault", which is far less confusing. Signed-off-by: Will Deacon --- arch/arm64/mm/fault.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 41cb6d3d6075..c11cd27ca8f5 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -380,7 +380,7 @@ static struct fault_info { { do_bad, SIGBUS, 0, "level 1 address size fault" }, { do_bad, SIGBUS, 0, "level 2 address size fault" }, { do_bad, SIGBUS, 0, "level 3 address size fault" }, - { do_translation_fault, SIGSEGV, SEGV_MAPERR, "input address range fault" }, + { do_translation_fault, SIGSEGV, SEGV_MAPERR, "level 0 translation fault" }, { do_translation_fault, SIGSEGV, SEGV_MAPERR, "level 1 translation fault" }, { do_translation_fault, SIGSEGV, SEGV_MAPERR, "level 2 translation fault" }, { do_page_fault, SIGSEGV, SEGV_MAPERR, "level 3 translation fault" }, From 909633957d85561dab7655d69a9d17dd16231d92 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 25 Nov 2014 10:05:35 +0000 Subject: [PATCH 36/70] arm64: fix return code check when changing emulation handler update_insn_emulation_mode() returns 0 on success, so we should be treating any non-zero values as failure, rather than the other way around. Otherwise, writes to the sysctl file controlling the emulation are ignored and immediately rolled back. Reported-by: Gene Hackmann Signed-off-by: Will Deacon --- arch/arm64/kernel/armv8_deprecated.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kernel/armv8_deprecated.c b/arch/arm64/kernel/armv8_deprecated.c index 529aad93336f..c363671d7509 100644 --- a/arch/arm64/kernel/armv8_deprecated.c +++ b/arch/arm64/kernel/armv8_deprecated.c @@ -165,7 +165,7 @@ static int emulation_proc_handler(struct ctl_table *table, int write, goto ret; ret = update_insn_emulation_mode(insn, prev_mode); - if (!ret) { + if (ret) { /* Mode change failed, revert to previous mode. */ insn->current_mode = prev_mode; update_insn_emulation_mode(insn, INSN_UNDEF); From 930da09f5e50dd22fb0a8600388da8677d62d671 Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Fri, 14 Nov 2014 15:54:07 +0000 Subject: [PATCH 37/70] arm64: add cpu_capabilities bitmap For taking note if at least one CPU in the system needs a bug workaround or would benefit from a code optimization, we create a new bitmap to hold (artificial) feature bits. Since elf_hwcap is part of the userland ABI, we keep it alone and introduce a new data structure for that (along with some accessors). Signed-off-by: Andre Przywara Signed-off-by: Will Deacon --- arch/arm64/include/asm/cpufeature.h | 20 ++++++++++++++++++++ arch/arm64/kernel/setup.c | 3 +++ 2 files changed, 23 insertions(+) diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index cd4ac0516488..20b2b3d6b702 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -21,9 +21,29 @@ #define MAX_CPU_FEATURES (8 * sizeof(elf_hwcap)) #define cpu_feature(x) ilog2(HWCAP_ ## x) +#define NCAPS 0 + +extern DECLARE_BITMAP(cpu_hwcaps, NCAPS); + static inline bool cpu_have_feature(unsigned int num) { return elf_hwcap & (1UL << num); } +static inline bool cpus_have_cap(unsigned int num) +{ + if (num >= NCAPS) + return false; + return test_bit(num, cpu_hwcaps); +} + +static inline void cpus_set_cap(unsigned int num) +{ + if (num >= NCAPS) + pr_warn("Attempt to set an illegal CPU capability (%d >= %d)\n", + num, NCAPS); + else + __set_bit(num, cpu_hwcaps); +} + #endif diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 2e17bd3806c8..c8629eb07ba6 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -50,6 +50,7 @@ #include #include #include +#include #include #include #include @@ -79,6 +80,8 @@ unsigned int compat_elf_hwcap __read_mostly = COMPAT_ELF_HWCAP_DEFAULT; unsigned int compat_elf_hwcap2 __read_mostly; #endif +DECLARE_BITMAP(cpu_hwcaps, NCAPS); + static const char *cpu_name; phys_addr_t __fdt_pointer __initdata; From e039ee4ee3fcf174736f2cb0a2eed6cb908348a6 Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Fri, 14 Nov 2014 15:54:08 +0000 Subject: [PATCH 38/70] arm64: add alternative runtime patching With a blatant copy of some x86 bits we introduce the alternative runtime patching "framework" to arm64. This is quite basic for now and we only provide the functions we need at this time. This is connected to the newly introduced feature bits. Signed-off-by: Andre Przywara Signed-off-by: Will Deacon --- arch/arm64/include/asm/alternative-asm.h | 16 ++++++ arch/arm64/include/asm/alternative.h | 43 ++++++++++++++++ arch/arm64/kernel/Makefile | 2 +- arch/arm64/kernel/alternative.c | 64 ++++++++++++++++++++++++ arch/arm64/kernel/smp.c | 2 + arch/arm64/kernel/vmlinux.lds.S | 11 ++++ arch/arm64/mm/init.c | 2 + 7 files changed, 139 insertions(+), 1 deletion(-) create mode 100644 arch/arm64/include/asm/alternative-asm.h create mode 100644 arch/arm64/include/asm/alternative.h create mode 100644 arch/arm64/kernel/alternative.c diff --git a/arch/arm64/include/asm/alternative-asm.h b/arch/arm64/include/asm/alternative-asm.h new file mode 100644 index 000000000000..5ee9340459b8 --- /dev/null +++ b/arch/arm64/include/asm/alternative-asm.h @@ -0,0 +1,16 @@ +#ifndef __ASM_ALTERNATIVE_ASM_H +#define __ASM_ALTERNATIVE_ASM_H + +#ifdef __ASSEMBLY__ + +.macro altinstruction_entry orig_offset alt_offset feature orig_len alt_len + .word \orig_offset - . + .word \alt_offset - . + .hword \feature + .byte \orig_len + .byte \alt_len +.endm + +#endif /* __ASSEMBLY__ */ + +#endif /* __ASM_ALTERNATIVE_ASM_H */ diff --git a/arch/arm64/include/asm/alternative.h b/arch/arm64/include/asm/alternative.h new file mode 100644 index 000000000000..f6d206e7f9e9 --- /dev/null +++ b/arch/arm64/include/asm/alternative.h @@ -0,0 +1,43 @@ +#ifndef __ASM_ALTERNATIVE_H +#define __ASM_ALTERNATIVE_H + +#include +#include +#include + +struct alt_instr { + s32 orig_offset; /* offset to original instruction */ + s32 alt_offset; /* offset to replacement instruction */ + u16 cpufeature; /* cpufeature bit set for replacement */ + u8 orig_len; /* size of original instruction(s) */ + u8 alt_len; /* size of new instruction(s), <= orig_len */ +}; + +void apply_alternatives(void); +void free_alternatives_memory(void); + +#define ALTINSTR_ENTRY(feature) \ + " .word 661b - .\n" /* label */ \ + " .word 663f - .\n" /* new instruction */ \ + " .hword " __stringify(feature) "\n" /* feature bit */ \ + " .byte 662b-661b\n" /* source len */ \ + " .byte 664f-663f\n" /* replacement len */ + +/* alternative assembly primitive: */ +#define ALTERNATIVE(oldinstr, newinstr, feature) \ + "661:\n\t" \ + oldinstr "\n" \ + "662:\n" \ + ".pushsection .altinstructions,\"a\"\n" \ + ALTINSTR_ENTRY(feature) \ + ".popsection\n" \ + ".pushsection .altinstr_replacement, \"a\"\n" \ + "663:\n\t" \ + newinstr "\n" \ + "664:\n\t" \ + ".popsection\n\t" \ + ".if ((664b-663b) != (662b-661b))\n\t" \ + " .error \"Alternatives instruction length mismatch\"\n\t"\ + ".endif\n" + +#endif /* __ASM_ALTERNATIVE_H */ diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index b36ebd0aacbb..591a65dc5c9b 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -16,7 +16,7 @@ arm64-obj-y := cputable.o debug-monitors.o entry.o irq.o fpsimd.o \ entry-fpsimd.o process.o ptrace.o setup.o signal.o \ sys.o stacktrace.o time.o traps.o io.o vdso.o \ hyp-stub.o psci.o cpu_ops.o insn.o return_address.o \ - cpuinfo.o + cpuinfo.o alternative.o arm64-obj-$(CONFIG_COMPAT) += sys32.o kuser32.o signal32.o \ sys_compat.o \ diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c new file mode 100644 index 000000000000..1a3badab800a --- /dev/null +++ b/arch/arm64/kernel/alternative.c @@ -0,0 +1,64 @@ +/* + * alternative runtime patching + * inspired by the x86 version + * + * Copyright (C) 2014 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#define pr_fmt(fmt) "alternatives: " fmt + +#include +#include +#include +#include +#include +#include + +extern struct alt_instr __alt_instructions[], __alt_instructions_end[]; + +static int __apply_alternatives(void *dummy) +{ + struct alt_instr *alt; + u8 *origptr, *replptr; + + for (alt = __alt_instructions; alt < __alt_instructions_end; alt++) { + if (!cpus_have_cap(alt->cpufeature)) + continue; + + BUG_ON(alt->alt_len > alt->orig_len); + + pr_info_once("patching kernel code\n"); + + origptr = (u8 *)&alt->orig_offset + alt->orig_offset; + replptr = (u8 *)&alt->alt_offset + alt->alt_offset; + memcpy(origptr, replptr, alt->alt_len); + flush_icache_range((uintptr_t)origptr, + (uintptr_t)(origptr + alt->alt_len)); + } + + return 0; +} + +void apply_alternatives(void) +{ + /* better not try code patching on a live SMP system */ + stop_machine(__apply_alternatives, NULL, NULL); +} + +void free_alternatives_memory(void) +{ + free_reserved_area(__alt_instructions, __alt_instructions_end, + 0, "alternatives"); +} diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index b06d1d90ee8c..0ef87896e4ae 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -37,6 +37,7 @@ #include #include +#include #include #include #include @@ -309,6 +310,7 @@ void cpu_die(void) void __init smp_cpus_done(unsigned int max_cpus) { pr_info("SMP: Total of %d processors activated.\n", num_online_cpus()); + apply_alternatives(); } void __init smp_prepare_boot_cpu(void) diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index 4596f46d0244..3236727be2b9 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -116,6 +116,17 @@ SECTIONS . = ALIGN(PAGE_SIZE); __init_end = .; + . = ALIGN(4); + .altinstructions : { + __alt_instructions = .; + *(.altinstructions) + __alt_instructions_end = .; + } + .altinstr_replacement : { + *(.altinstr_replacement) + } + + . = ALIGN(PAGE_SIZE); _data = .; _sdata = .; RW_DATA_SECTION(64, PAGE_SIZE, THREAD_SIZE) diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 494297c698ca..bac492c12fcc 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -39,6 +39,7 @@ #include #include #include +#include #include "mm.h" @@ -325,6 +326,7 @@ void __init mem_init(void) void free_initmem(void) { free_initmem_default(0); + free_alternatives_memory(); } #ifdef CONFIG_BLK_DEV_INITRD From e116a375423393cdb94714e90a96857005d58428 Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Fri, 14 Nov 2014 15:54:09 +0000 Subject: [PATCH 39/70] arm64: detect silicon revisions and set cap bits accordingly After each CPU has been started, we iterate through a list of CPU features or bugs to detect CPUs which need (or could benefit from) kernel code patches. For each feature/bug there is a function which checks if that particular CPU is affected. We will later provide some more generic functions for common things like testing for certain MIDR ranges. We do this for every CPU to cover big.LITTLE systems properly as well. If a certain feature/bug has been detected, the capability bit will be set, so that later the call to apply_alternatives() will trigger the actual code patching. Signed-off-by: Andre Przywara Signed-off-by: Will Deacon --- arch/arm64/include/asm/cpufeature.h | 2 + arch/arm64/kernel/Makefile | 2 +- arch/arm64/kernel/cpu_errata.c | 59 +++++++++++++++++++++++++++++ arch/arm64/kernel/cpuinfo.c | 3 ++ 4 files changed, 65 insertions(+), 1 deletion(-) create mode 100644 arch/arm64/kernel/cpu_errata.c diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 20b2b3d6b702..744eaf7fab0f 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -46,4 +46,6 @@ static inline void cpus_set_cap(unsigned int num) __set_bit(num, cpu_hwcaps); } +void check_local_cpu_errata(void); + #endif diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 591a65dc5c9b..eaa77ed7766a 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -16,7 +16,7 @@ arm64-obj-y := cputable.o debug-monitors.o entry.o irq.o fpsimd.o \ entry-fpsimd.o process.o ptrace.o setup.o signal.o \ sys.o stacktrace.o time.o traps.o io.o vdso.o \ hyp-stub.o psci.o cpu_ops.o insn.o return_address.o \ - cpuinfo.o alternative.o + cpuinfo.o cpu_errata.o alternative.o arm64-obj-$(CONFIG_COMPAT) += sys32.o kuser32.o signal32.o \ sys_compat.o \ diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c new file mode 100644 index 000000000000..9332cf7c0826 --- /dev/null +++ b/arch/arm64/kernel/cpu_errata.c @@ -0,0 +1,59 @@ +/* + * Contains CPU specific errata definitions + * + * Copyright (C) 2014 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#define pr_fmt(fmt) "alternative: " fmt + +#include +#include +#include +#include + +/* + * Add a struct or another datatype to the union below if you need + * different means to detect an affected CPU. + */ +struct arm64_cpu_capabilities { + const char *desc; + u16 capability; + bool (*is_affected)(struct arm64_cpu_capabilities *); + union { + struct { + u32 midr_model; + u32 midr_range_min, midr_range_max; + }; + }; +}; + +struct arm64_cpu_capabilities arm64_errata[] = { + {} +}; + +void check_local_cpu_errata(void) +{ + struct arm64_cpu_capabilities *cpus = arm64_errata; + int i; + + for (i = 0; cpus[i].desc; i++) { + if (!cpus[i].is_affected(&cpus[i])) + continue; + + if (!cpus_have_cap(cpus[i].capability)) + pr_info("enabling workaround for %s\n", cpus[i].desc); + cpus_set_cap(cpus[i].capability); + } +} diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index 504fdaa8367e..16d6d032ecf1 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -186,6 +187,8 @@ static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info) info->reg_id_pfr1 = read_cpuid(ID_PFR1_EL1); cpuinfo_detect_icache_policy(info); + + check_local_cpu_errata(); } void cpuinfo_store_cpu(void) From 301bcfac42897dbd1b0b3c1be49f24654a1bc49e Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Fri, 14 Nov 2014 15:54:10 +0000 Subject: [PATCH 40/70] arm64: add Cortex-A53 cache errata workaround The ARM errata 819472, 826319, 827319 and 824069 define the same workaround for these hardware issues in certain Cortex-A53 parts. Use the new alternatives framework and the CPU MIDR detection to patch "cache clean" into "cache clean and invalidate" instructions if an affected CPU is detected at runtime. Signed-off-by: Andre Przywara [will: add __maybe_unused to squash gcc warning] Signed-off-by: Will Deacon --- arch/arm64/include/asm/alternative-asm.h | 13 ++++++++++ arch/arm64/include/asm/cpufeature.h | 8 +++++- arch/arm64/include/asm/cputype.h | 5 ++++ arch/arm64/kernel/cpu_errata.c | 33 +++++++++++++++++++++++- arch/arm64/mm/cache.S | 4 ++- 5 files changed, 60 insertions(+), 3 deletions(-) diff --git a/arch/arm64/include/asm/alternative-asm.h b/arch/arm64/include/asm/alternative-asm.h index 5ee9340459b8..919a67855b63 100644 --- a/arch/arm64/include/asm/alternative-asm.h +++ b/arch/arm64/include/asm/alternative-asm.h @@ -11,6 +11,19 @@ .byte \alt_len .endm +.macro alternative_insn insn1 insn2 cap +661: \insn1 +662: .pushsection .altinstructions, "a" + altinstruction_entry 661b, 663f, \cap, 662b-661b, 664f-663f + .popsection + .pushsection .altinstr_replacement, "ax" +663: \insn2 +664: .popsection + .if ((664b-663b) != (662b-661b)) + .error "Alternatives instruction length mismatch" + .endif +.endm + #endif /* __ASSEMBLY__ */ #endif /* __ASM_ALTERNATIVE_ASM_H */ diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 744eaf7fab0f..92b6ee44669b 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -21,7 +21,11 @@ #define MAX_CPU_FEATURES (8 * sizeof(elf_hwcap)) #define cpu_feature(x) ilog2(HWCAP_ ## x) -#define NCAPS 0 +#define ARM64_WORKAROUND_CLEAN_CACHE 0 + +#define NCAPS 1 + +#ifndef __ASSEMBLY__ extern DECLARE_BITMAP(cpu_hwcaps, NCAPS); @@ -48,4 +52,6 @@ static inline void cpus_set_cap(unsigned int num) void check_local_cpu_errata(void); +#endif /* __ASSEMBLY__ */ + #endif diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index 379d0b874328..8adb986a3086 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -57,6 +57,11 @@ #define MIDR_IMPLEMENTOR(midr) \ (((midr) & MIDR_IMPLEMENTOR_MASK) >> MIDR_IMPLEMENTOR_SHIFT) +#define MIDR_CPU_PART(imp, partnum) \ + (((imp) << MIDR_IMPLEMENTOR_SHIFT) | \ + (0xf << MIDR_ARCHITECTURE_SHIFT) | \ + ((partnum) << MIDR_PARTNUM_SHIFT)) + #define ARM_CPU_IMP_ARM 0x41 #define ARM_CPU_IMP_APM 0x50 diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 9332cf7c0826..e107ed2d66dc 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -23,6 +23,8 @@ #include #include +#define MIDR_CORTEX_A53 MIDR_CPU_PART(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A53) + /* * Add a struct or another datatype to the union below if you need * different means to detect an affected CPU. @@ -39,8 +41,37 @@ struct arm64_cpu_capabilities { }; }; +#define CPU_MODEL_MASK (MIDR_IMPLEMENTOR_MASK | MIDR_PARTNUM_MASK | \ + MIDR_ARCHITECTURE_MASK) + +static bool __maybe_unused +is_affected_midr_range(struct arm64_cpu_capabilities *entry) +{ + u32 midr = read_cpuid_id(); + + if ((midr & CPU_MODEL_MASK) != entry->midr_model) + return false; + + midr &= MIDR_REVISION_MASK | MIDR_VARIANT_MASK; + + return (midr >= entry->midr_range_min && midr <= entry->midr_range_max); +} + +#define MIDR_RANGE(model, min, max) \ + .is_affected = is_affected_midr_range, \ + .midr_model = model, \ + .midr_range_min = min, \ + .midr_range_max = max + struct arm64_cpu_capabilities arm64_errata[] = { - {} + { + /* Cortex-A53 r0p[012] */ + .desc = "ARM errata 826319, 827319, 824069", + .capability = ARM64_WORKAROUND_CLEAN_CACHE, + MIDR_RANGE(MIDR_CORTEX_A53, 0x00, 0x02), + }, + { + } }; void check_local_cpu_errata(void) diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S index 23663837acff..8eaf18577d71 100644 --- a/arch/arm64/mm/cache.S +++ b/arch/arm64/mm/cache.S @@ -20,6 +20,8 @@ #include #include #include +#include +#include #include "proc-macros.S" @@ -210,7 +212,7 @@ __dma_clean_range: dcache_line_size x2, x3 sub x3, x2, #1 bic x0, x0, x3 -1: dc cvac, x0 // clean D / U line +1: alternative_insn "dc cvac, x0", "dc civac, x0", ARM64_WORKAROUND_CLEAN_CACHE add x0, x0, x2 cmp x0, x1 b.lo 1b From 5afaa1fc1b320cec48affa7e6949f2493f875c12 Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Fri, 14 Nov 2014 15:54:11 +0000 Subject: [PATCH 41/70] arm64: add Cortex-A57 erratum 832075 workaround The ARM erratum 832075 applies to certain revisions of Cortex-A57, one of the workarounds is to change device loads into using load-aquire semantics. This is achieved using the alternatives framework. Signed-off-by: Andre Przywara Signed-off-by: Will Deacon --- arch/arm64/include/asm/cpufeature.h | 5 +++-- arch/arm64/include/asm/io.h | 23 +++++++++++++++++++---- arch/arm64/kernel/cpu_errata.c | 7 +++++++ 3 files changed, 29 insertions(+), 6 deletions(-) diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 92b6ee44669b..0362f8020d46 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -21,9 +21,10 @@ #define MAX_CPU_FEATURES (8 * sizeof(elf_hwcap)) #define cpu_feature(x) ilog2(HWCAP_ ## x) -#define ARM64_WORKAROUND_CLEAN_CACHE 0 +#define ARM64_WORKAROUND_CLEAN_CACHE 0 +#define ARM64_WORKAROUND_DEVICE_LOAD_ACQUIRE 1 -#define NCAPS 1 +#define NCAPS 2 #ifndef __ASSEMBLY__ diff --git a/arch/arm64/include/asm/io.h b/arch/arm64/include/asm/io.h index 79f1d519221f..75825b63464d 100644 --- a/arch/arm64/include/asm/io.h +++ b/arch/arm64/include/asm/io.h @@ -28,6 +28,8 @@ #include #include #include +#include +#include #include @@ -57,28 +59,41 @@ static inline void __raw_writeq(u64 val, volatile void __iomem *addr) static inline u8 __raw_readb(const volatile void __iomem *addr) { u8 val; - asm volatile("ldrb %w0, [%1]" : "=r" (val) : "r" (addr)); + asm volatile(ALTERNATIVE("ldrb %w0, [%1]", + "ldarb %w0, [%1]", + ARM64_WORKAROUND_DEVICE_LOAD_ACQUIRE) + : "=r" (val) : "r" (addr)); return val; } static inline u16 __raw_readw(const volatile void __iomem *addr) { u16 val; - asm volatile("ldrh %w0, [%1]" : "=r" (val) : "r" (addr)); + + asm volatile(ALTERNATIVE("ldrh %w0, [%1]", + "ldarh %w0, [%1]", + ARM64_WORKAROUND_DEVICE_LOAD_ACQUIRE) + : "=r" (val) : "r" (addr)); return val; } static inline u32 __raw_readl(const volatile void __iomem *addr) { u32 val; - asm volatile("ldr %w0, [%1]" : "=r" (val) : "r" (addr)); + asm volatile(ALTERNATIVE("ldr %w0, [%1]", + "ldar %w0, [%1]", + ARM64_WORKAROUND_DEVICE_LOAD_ACQUIRE) + : "=r" (val) : "r" (addr)); return val; } static inline u64 __raw_readq(const volatile void __iomem *addr) { u64 val; - asm volatile("ldr %0, [%1]" : "=r" (val) : "r" (addr)); + asm volatile(ALTERNATIVE("ldr %0, [%1]", + "ldar %0, [%1]", + ARM64_WORKAROUND_DEVICE_LOAD_ACQUIRE) + : "=r" (val) : "r" (addr)); return val; } diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index e107ed2d66dc..30935d2da55a 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -24,6 +24,7 @@ #include #define MIDR_CORTEX_A53 MIDR_CPU_PART(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A53) +#define MIDR_CORTEX_A57 MIDR_CPU_PART(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A57) /* * Add a struct or another datatype to the union below if you need @@ -71,6 +72,12 @@ struct arm64_cpu_capabilities arm64_errata[] = { MIDR_RANGE(MIDR_CORTEX_A53, 0x00, 0x02), }, { + /* Cortex-A57 r0p0 - r1p2 */ + .desc = "ARM erratum 832075", + .capability = ARM64_WORKAROUND_DEVICE_LOAD_ACQUIRE, + MIDR_RANGE(MIDR_CORTEX_A57, 0x00, 0x12), + }, + { } }; From c0a01b84b1fdbd98bff5bca5b201fe73fda7e9d9 Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Fri, 14 Nov 2014 15:54:12 +0000 Subject: [PATCH 42/70] arm64: protect alternatives workarounds with Kconfig options Not all of the errata we have workarounds for apply necessarily to all SoCs, so people compiling a kernel for one very specific SoC may not need to patch the kernel. Introduce a new submenu in the "Platform selection" menu to allow people to turn off certain bugs if they are not affected. By default all of them are enabled. Normal users or distribution kernels shouldn't bother to deselect any bugs here, since the alternatives framework will take care of patching them in only if needed. Signed-off-by: Andre Przywara [will: moved kconfig menu under `Kernel Features'] Signed-off-by: Will Deacon --- arch/arm64/Kconfig | 108 +++++++++++++++++++++++++++++++++ arch/arm64/kernel/cpu_errata.c | 14 +++++ 2 files changed, 122 insertions(+) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 2294be00f0ca..f5412d628ff6 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -195,6 +195,114 @@ endmenu menu "Kernel Features" +menu "ARM errata workarounds via the alternatives framework" + +config ARM64_ERRATUM_826319 + bool "Cortex-A53: 826319: System might deadlock if a write cannot complete until read data is accepted" + default y + help + This option adds an alternative code sequence to work around ARM + erratum 826319 on Cortex-A53 parts up to r0p2 with an AMBA 4 ACE or + AXI master interface and an L2 cache. + + If a Cortex-A53 uses an AMBA AXI4 ACE interface to other processors + and is unable to accept a certain write via this interface, it will + not progress on read data presented on the read data channel and the + system can deadlock. + + The workaround promotes data cache clean instructions to + data cache clean-and-invalidate. + Please note that this does not necessarily enable the workaround, + as it depends on the alternative framework, which will only patch + the kernel if an affected CPU is detected. + + If unsure, say Y. + +config ARM64_ERRATUM_827319 + bool "Cortex-A53: 827319: Data cache clean instructions might cause overlapping transactions to the interconnect" + default y + help + This option adds an alternative code sequence to work around ARM + erratum 827319 on Cortex-A53 parts up to r0p2 with an AMBA 5 CHI + master interface and an L2 cache. + + Under certain conditions this erratum can cause a clean line eviction + to occur at the same time as another transaction to the same address + on the AMBA 5 CHI interface, which can cause data corruption if the + interconnect reorders the two transactions. + + The workaround promotes data cache clean instructions to + data cache clean-and-invalidate. + Please note that this does not necessarily enable the workaround, + as it depends on the alternative framework, which will only patch + the kernel if an affected CPU is detected. + + If unsure, say Y. + +config ARM64_ERRATUM_824069 + bool "Cortex-A53: 824069: Cache line might not be marked as clean after a CleanShared snoop" + default y + help + This option adds an alternative code sequence to work around ARM + erratum 824069 on Cortex-A53 parts up to r0p2 when it is connected + to a coherent interconnect. + + If a Cortex-A53 processor is executing a store or prefetch for + write instruction at the same time as a processor in another + cluster is executing a cache maintenance operation to the same + address, then this erratum might cause a clean cache line to be + incorrectly marked as dirty. + + The workaround promotes data cache clean instructions to + data cache clean-and-invalidate. + Please note that this option does not necessarily enable the + workaround, as it depends on the alternative framework, which will + only patch the kernel if an affected CPU is detected. + + If unsure, say Y. + +config ARM64_ERRATUM_819472 + bool "Cortex-A53: 819472: Store exclusive instructions might cause data corruption" + default y + help + This option adds an alternative code sequence to work around ARM + erratum 819472 on Cortex-A53 parts up to r0p1 with an L2 cache + present when it is connected to a coherent interconnect. + + If the processor is executing a load and store exclusive sequence at + the same time as a processor in another cluster is executing a cache + maintenance operation to the same address, then this erratum might + cause data corruption. + + The workaround promotes data cache clean instructions to + data cache clean-and-invalidate. + Please note that this does not necessarily enable the workaround, + as it depends on the alternative framework, which will only patch + the kernel if an affected CPU is detected. + + If unsure, say Y. + +config ARM64_ERRATUM_832075 + bool "Cortex-A57: 832075: possible deadlock on mixing exclusive memory accesses with device loads" + default y + help + This option adds an alternative code sequence to work around ARM + erratum 832075 on Cortex-A57 parts up to r1p2. + + Affected Cortex-A57 parts might deadlock when exclusive load/store + instructions to Write-Back memory are mixed with Device loads. + + The workaround is to promote device loads to use Load-Acquire + semantics. + Please note that this does not necessarily enable the workaround, + as it depends on the alternative framework, which will only patch + the kernel if an affected CPU is detected. + + If unsure, say Y. + +endmenu + + choice prompt "Page size" default ARM64_4K_PAGES diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 30935d2da55a..5a5226ffcbc8 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -65,18 +65,32 @@ is_affected_midr_range(struct arm64_cpu_capabilities *entry) .midr_range_max = max struct arm64_cpu_capabilities arm64_errata[] = { +#if defined(CONFIG_ARM64_ERRATUM_826319) || \ + defined(CONFIG_ARM64_ERRATUM_827319) || \ + defined(CONFIG_ARM64_ERRATUM_824069) { /* Cortex-A53 r0p[012] */ .desc = "ARM errata 826319, 827319, 824069", .capability = ARM64_WORKAROUND_CLEAN_CACHE, MIDR_RANGE(MIDR_CORTEX_A53, 0x00, 0x02), }, +#endif +#ifdef CONFIG_ARM64_ERRATUM_819472 + { + /* Cortex-A53 r0p[01] */ + .desc = "ARM errata 819472", + .capability = ARM64_WORKAROUND_CLEAN_CACHE, + MIDR_RANGE(MIDR_CORTEX_A53, 0x00, 0x01), + }, +#endif +#ifdef CONFIG_ARM64_ERRATUM_832075 { /* Cortex-A57 r0p0 - r1p2 */ .desc = "ARM erratum 832075", .capability = ARM64_WORKAROUND_DEVICE_LOAD_ACQUIRE, MIDR_RANGE(MIDR_CORTEX_A57, 0x00, 0x12), }, +#endif { } }; From 1cefdaea613bcd247b2235079fcbb0ca4687542d Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Fri, 21 Nov 2014 00:36:49 +0000 Subject: [PATCH 43/70] arm64: topology: Fix handling of multi-level cluster MPIDR-based detection The only requirement the scheduler has on cluster IDs is that they must be unique. When enumerating the topology based on MPIDR information the kernel currently generates cluster IDs by using the first level of affinity above the core ID (either level one or two depending on if the core has multiple threads) however the ARMv8 architecture allows for up to three levels of affinity. This means that an ARMv8 system may contain cores which have MPIDRs identical other than affinity level three which with current code will cause us to report multiple cores with the same identification to the scheduler in violation of its uniqueness requirement. Ensure that we do not violate the scheduler requirements on systems that uses all the affinity levels by incorporating both affinity levels two and three into the cluser ID when the cores are not threaded. While no currently known hardware uses multi-level clusters it is better to program defensively, this will help ease bringup of systems that have them and will ensure that things like distribution install media do not need to be respun to replace kernels in order to deploy such systems. In the worst case the system will work but perform suboptimally until a kernel modified to handle the new topology better is installed, in the best case this will be an adequate description of such topologies for the scheduler to perform well. Signed-off-by: Mark Brown Signed-off-by: Will Deacon --- arch/arm64/kernel/topology.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c index b6ee26b0939a..fcb8f7b42271 100644 --- a/arch/arm64/kernel/topology.c +++ b/arch/arm64/kernel/topology.c @@ -255,12 +255,15 @@ void store_cpu_topology(unsigned int cpuid) /* Multiprocessor system : Multi-threads per core */ cpuid_topo->thread_id = MPIDR_AFFINITY_LEVEL(mpidr, 0); cpuid_topo->core_id = MPIDR_AFFINITY_LEVEL(mpidr, 1); - cpuid_topo->cluster_id = MPIDR_AFFINITY_LEVEL(mpidr, 2); + cpuid_topo->cluster_id = MPIDR_AFFINITY_LEVEL(mpidr, 2) | + MPIDR_AFFINITY_LEVEL(mpidr, 3) << 8; } else { /* Multiprocessor system : Single-thread per core */ cpuid_topo->thread_id = -1; cpuid_topo->core_id = MPIDR_AFFINITY_LEVEL(mpidr, 0); - cpuid_topo->cluster_id = MPIDR_AFFINITY_LEVEL(mpidr, 1); + cpuid_topo->cluster_id = MPIDR_AFFINITY_LEVEL(mpidr, 1) | + MPIDR_AFFINITY_LEVEL(mpidr, 2) << 8 | + MPIDR_AFFINITY_LEVEL(mpidr, 3) << 16; } pr_debug("CPU%u: cluster %d core %d thread %d mpidr %#016llx\n", From 9760270c36a4d2ac640ea6294f4f8634a8b27121 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 25 Nov 2014 13:27:41 +0000 Subject: [PATCH 44/70] arm64: sanity checks: ignore ID_MMFR0.AuxReg It seems that Cortex-A53 r0p4 added support for AIFSR and ADFSR, and ID_MMFR0.AuxReg has been updated accordingly to report this fact. As Cortex-A53 could be paired with CPUs which do not implement these registers (e.g. all current revisions of Cortex-A57), this may trigger a sanity check failure at boot. The AuxReg value describes the availability of the ACTLR, AIFSR, and ADFSR registers, which are only of use to 32-bit guest OSs, and have IMPLEMENTATION DEFINED contents. Given the nature of these registers it is likely that KVM will need to trap accesses regardless of whether the CPUs are heterogeneous. This patch masks out the ID_MMFR0.AuxReg value from the sanity checks, preventing spurious warnings at boot time. Signed-off-by: Mark Rutland Reported-by: Andre Przywara Cc: Catalin Marinas Acked-by: Will Deacon Cc: Marc Zyngier Cc: Peter Maydell Signed-off-by: Will Deacon --- arch/arm64/kernel/cpuinfo.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index 16d6d032ecf1..01994d4ce0c2 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -144,7 +144,12 @@ static void cpuinfo_sanity_check(struct cpuinfo_arm64 *cur) diff |= CHECK(id_isar3, boot, cur, cpu); diff |= CHECK(id_isar4, boot, cur, cpu); diff |= CHECK(id_isar5, boot, cur, cpu); - diff |= CHECK(id_mmfr0, boot, cur, cpu); + /* + * Regardless of the value of the AuxReg field, the AIFSR, ADFSR, and + * ACTLR formats could differ across CPUs and therefore would have to + * be trapped for virtualization anyway. + */ + diff |= CHECK_MASK(id_mmfr0, 0xff0fffff, boot, cur, cpu); diff |= CHECK(id_mmfr1, boot, cur, cpu); diff |= CHECK(id_mmfr2, boot, cur, cpu); diff |= CHECK(id_mmfr3, boot, cur, cpu); From efdf4211d5b103535ae22972acadf57c9fc38b30 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 25 Nov 2014 13:27:42 +0000 Subject: [PATCH 45/70] arm64: sanity checks: add missing newline to print A missing newline in the WARN_TAINT_ONCE string results in ugly and somewhat difficult to read output in the case of a sanity check failure, as the next print does not appear on a new line: Unsupported CPU feature variation.Modules linked in: This patch adds the missing newline, fixing the output formatting. Signed-off-by: Mark Rutland Cc: Catalin Marinas Acked-by: Will Deacon Signed-off-by: Will Deacon --- arch/arm64/kernel/cpuinfo.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index 01994d4ce0c2..9da33f96b678 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -161,7 +161,7 @@ static void cpuinfo_sanity_check(struct cpuinfo_arm64 *cur) * pretend to support them. */ WARN_TAINT_ONCE(diff, TAINT_CPU_OUT_OF_SPEC, - "Unsupported CPU feature variation."); + "Unsupported CPU feature variation.\n"); } static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info) From 3eebdbe5fc7d64c7a6ef14cc5b8be518ffd563fa Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 25 Nov 2014 13:27:43 +0000 Subject: [PATCH 46/70] arm64: sanity checks: add ID_AA64DFR{0,1}_EL1 While we currently expect self-hosted debug support to be identical across CPUs, we don't currently sanity check this. This patch adds logging of the ID_AA64DFR{0,1}_EL1 values and associated sanity checking code. It's not clear to me whether we need to check PMUVer, TraceVer, and DebugVer, as we don't currently rely on these fields at all. Signed-off-by: Mark Rutland Cc: Catalin Marinas Acked-by: Will Deacon Signed-off-by: Will Deacon --- arch/arm64/include/asm/cpu.h | 2 ++ arch/arm64/kernel/cpuinfo.c | 11 +++++++++++ 2 files changed, 13 insertions(+) diff --git a/arch/arm64/include/asm/cpu.h b/arch/arm64/include/asm/cpu.h index 056443086019..ace70682499b 100644 --- a/arch/arm64/include/asm/cpu.h +++ b/arch/arm64/include/asm/cpu.h @@ -30,6 +30,8 @@ struct cpuinfo_arm64 { u32 reg_dczid; u32 reg_midr; + u64 reg_id_aa64dfr0; + u64 reg_id_aa64dfr1; u64 reg_id_aa64isar0; u64 reg_id_aa64isar1; u64 reg_id_aa64mmfr0; diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index 9da33f96b678..57b641747534 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -111,6 +111,15 @@ static void cpuinfo_sanity_check(struct cpuinfo_arm64 *cur) /* If different, timekeeping will be broken (especially with KVM) */ diff |= CHECK(cntfrq, boot, cur, cpu); + /* + * The kernel uses self-hosted debug features and expects CPUs to + * support identical debug features. We presently need CTX_CMPs, WRPs, + * and BRPs to be identical. + * ID_AA64DFR1 is currently RES0. + */ + diff |= CHECK(id_aa64dfr0, boot, cur, cpu); + diff |= CHECK(id_aa64dfr1, boot, cur, cpu); + /* * Even in big.LITTLE, processors should be identical instruction-set * wise. @@ -171,6 +180,8 @@ static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info) info->reg_dczid = read_cpuid(DCZID_EL0); info->reg_midr = read_cpuid_id(); + info->reg_id_aa64dfr0 = read_cpuid(ID_AA64DFR0_EL1); + info->reg_id_aa64dfr1 = read_cpuid(ID_AA64DFR1_EL1); info->reg_id_aa64isar0 = read_cpuid(ID_AA64ISAR0_EL1); info->reg_id_aa64isar1 = read_cpuid(ID_AA64ISAR1_EL1); info->reg_id_aa64mmfr0 = read_cpuid(ID_AA64MMFR0_EL1); From fcff588633e848aa728a4437ef96d437299ba03d Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Fri, 21 Nov 2014 21:50:38 +0000 Subject: [PATCH 47/70] arm64: Treat handle_arch_irq as a function pointer handle_arch_irq isn't actually text, it's just a function pointer. It doesn't need to be stored in the text section and doing so causes problesm if we ever want to make the kernel text read only. Declare handle_arch_irq as a proper function pointer stored in the data section. Reviewed-by: Kees Cook Reviewed-by: Mark Rutland Acked-by: Ard Biesheuvel Tested-by: Mark Rutland Tested-by: Kees Cook Signed-off-by: Laura Abbott Signed-off-by: Will Deacon --- arch/arm64/include/asm/irq.h | 1 - arch/arm64/kernel/entry.S | 6 ++---- arch/arm64/kernel/irq.c | 2 ++ 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/arch/arm64/include/asm/irq.h b/arch/arm64/include/asm/irq.h index e1f7ecdde11f..1eebf5bb0b58 100644 --- a/arch/arm64/include/asm/irq.h +++ b/arch/arm64/include/asm/irq.h @@ -3,7 +3,6 @@ #include -extern void (*handle_arch_irq)(struct pt_regs *); extern void migrate_irqs(void); extern void set_handle_irq(void (*handle_irq)(struct pt_regs *)); diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 622a409916f3..99c8d13fc00d 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -167,7 +167,8 @@ tsk .req x28 // current thread_info * Interrupt handling. */ .macro irq_handler - ldr x1, handle_arch_irq + adrp x1, handle_arch_irq + ldr x1, [x1, #:lo12:handle_arch_irq] mov x0, sp blr x1 .endm @@ -699,6 +700,3 @@ ENTRY(sys_rt_sigreturn_wrapper) mov x0, sp b sys_rt_sigreturn ENDPROC(sys_rt_sigreturn_wrapper) - -ENTRY(handle_arch_irq) - .quad 0 diff --git a/arch/arm64/kernel/irq.c b/arch/arm64/kernel/irq.c index 071a6ec13bd8..240b75c0e94f 100644 --- a/arch/arm64/kernel/irq.c +++ b/arch/arm64/kernel/irq.c @@ -40,6 +40,8 @@ int arch_show_interrupts(struct seq_file *p, int prec) return 0; } +void (*handle_arch_irq)(struct pt_regs *) = NULL; + void __init set_handle_irq(void (*handle_irq)(struct pt_regs *)) { if (handle_arch_irq) From ac2dec5f6c27a581f8571da605d9ba04df18330d Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Fri, 21 Nov 2014 21:50:39 +0000 Subject: [PATCH 48/70] arm64: Switch to adrp for loading the stub vectors The hyp stub vectors are currently loaded using adr. This instruction has a +/- 1MB range for the loading address. If the alignment for sections is changed the address may be more than 1MB away, resulting in reclocation errors. Switch to using adrp for getting the address to ensure we aren't affected by the location of the __hyp_stub_vectors. Acked-by: Ard Biesheuvel Acked-by: Marc Zyngier Tested-by: Mark Rutland Tested-by: Kees Cook Signed-off-by: Laura Abbott Signed-off-by: Will Deacon --- arch/arm64/kernel/head.S | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 5a76e3ab788c..fddda8565a52 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -341,7 +341,8 @@ CPU_LE( movk x0, #0x30d0, lsl #16 ) // Clear EE and E0E on LE systems msr vttbr_el2, xzr /* Hypervisor stub */ - adr x0, __hyp_stub_vectors + adrp x0, __hyp_stub_vectors + add x0, x0, #:lo12:__hyp_stub_vectors msr vbar_el2, x0 /* spsr */ From c3684fbb446501b48dec6677a6a9f61c215053de Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Fri, 21 Nov 2014 21:50:40 +0000 Subject: [PATCH 49/70] arm64: Move cpu_resume into the text section The function cpu_resume currently lives in the .data section. There's no reason for it to be there since we can use relative instructions without a problem. Move a few cpu_resume data structures out of the assembly file so the .data annotation can be dropped completely and cpu_resume ends up in the read only text section. Reviewed-by: Kees Cook Reviewed-by: Mark Rutland Reviewed-by: Lorenzo Pieralisi Tested-by: Mark Rutland Tested-by: Lorenzo Pieralisi Tested-by: Kees Cook Acked-by: Ard Biesheuvel Signed-off-by: Laura Abbott Signed-off-by: Will Deacon --- arch/arm64/kernel/sleep.S | 36 ++++++------------------------------ arch/arm64/kernel/suspend.c | 4 ++-- 2 files changed, 8 insertions(+), 32 deletions(-) diff --git a/arch/arm64/kernel/sleep.S b/arch/arm64/kernel/sleep.S index a564b440416a..ede186cdd452 100644 --- a/arch/arm64/kernel/sleep.S +++ b/arch/arm64/kernel/sleep.S @@ -147,14 +147,12 @@ cpu_resume_after_mmu: ret ENDPROC(cpu_resume_after_mmu) - .data ENTRY(cpu_resume) bl el2_setup // if in EL2 drop to EL1 cleanly #ifdef CONFIG_SMP mrs x1, mpidr_el1 - adr x4, mpidr_hash_ptr - ldr x5, [x4] - add x8, x4, x5 // x8 = struct mpidr_hash phys address + adrp x8, mpidr_hash + add x8, x8, #:lo12:mpidr_hash // x8 = struct mpidr_hash phys address /* retrieve mpidr_hash members to compute the hash */ ldr x2, [x8, #MPIDR_HASH_MASK] ldp w3, w4, [x8, #MPIDR_HASH_SHIFTS] @@ -164,14 +162,15 @@ ENTRY(cpu_resume) #else mov x7, xzr #endif - adr x0, sleep_save_sp + adrp x0, sleep_save_sp + add x0, x0, #:lo12:sleep_save_sp ldr x0, [x0, #SLEEP_SAVE_SP_PHYS] ldr x0, [x0, x7, lsl #3] /* load sp from context */ ldr x2, [x0, #CPU_CTX_SP] - adr x1, sleep_idmap_phys + adrp x1, sleep_idmap_phys /* load physical address of identity map page table in x1 */ - ldr x1, [x1] + ldr x1, [x1, #:lo12:sleep_idmap_phys] mov sp, x2 /* * cpu_do_resume expects x0 to contain context physical address @@ -180,26 +179,3 @@ ENTRY(cpu_resume) bl cpu_do_resume // PC relative jump, MMU off b cpu_resume_mmu // Resume MMU, never returns ENDPROC(cpu_resume) - - .align 3 -mpidr_hash_ptr: - /* - * offset of mpidr_hash symbol from current location - * used to obtain run-time mpidr_hash address with MMU off - */ - .quad mpidr_hash - . -/* - * physical address of identity mapped page tables - */ - .type sleep_idmap_phys, #object -ENTRY(sleep_idmap_phys) - .quad 0 -/* - * struct sleep_save_sp { - * phys_addr_t *save_ptr_stash; - * phys_addr_t save_ptr_stash_phys; - * }; - */ - .type sleep_save_sp, #object -ENTRY(sleep_save_sp) - .space SLEEP_SAVE_SP_SZ // struct sleep_save_sp diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c index 13ad4dbb1615..3771b72b6569 100644 --- a/arch/arm64/kernel/suspend.c +++ b/arch/arm64/kernel/suspend.c @@ -126,8 +126,8 @@ int __cpu_suspend(unsigned long arg, int (*fn)(unsigned long)) return ret; } -extern struct sleep_save_sp sleep_save_sp; -extern phys_addr_t sleep_idmap_phys; +struct sleep_save_sp sleep_save_sp; +phys_addr_t sleep_idmap_phys; static int __init cpu_suspend_init(void) { From af86e5974d3069bd26ebcf7c046c6e59726acaaa Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Fri, 21 Nov 2014 21:50:42 +0000 Subject: [PATCH 50/70] arm64: Factor out fixmap initialization from ioremap The fixmap API was originally added for arm64 for early_ioremap purposes. It can be used for other purposes too so move the initialization from ioremap to somewhere more generic. This makes it obvious where the fixmap is being set up and allows for a cleaner implementation of __set_fixmap. Reviewed-by: Kees Cook Acked-by: Mark Rutland Tested-by: Mark Rutland Tested-by: Kees Cook Signed-off-by: Laura Abbott Signed-off-by: Will Deacon --- arch/arm64/include/asm/fixmap.h | 7 +-- arch/arm64/kernel/setup.c | 1 + arch/arm64/mm/ioremap.c | 93 ++------------------------------ arch/arm64/mm/mmu.c | 94 +++++++++++++++++++++++++++++++++ 4 files changed, 102 insertions(+), 93 deletions(-) diff --git a/arch/arm64/include/asm/fixmap.h b/arch/arm64/include/asm/fixmap.h index 5f7bfe6df723..db26a2f2b7a7 100644 --- a/arch/arm64/include/asm/fixmap.h +++ b/arch/arm64/include/asm/fixmap.h @@ -56,10 +56,11 @@ enum fixed_addresses { #define FIXMAP_PAGE_IO __pgprot(PROT_DEVICE_nGnRE) -extern void __early_set_fixmap(enum fixed_addresses idx, - phys_addr_t phys, pgprot_t flags); +void __init early_fixmap_init(void); -#define __set_fixmap __early_set_fixmap +#define __early_set_fixmap __set_fixmap + +extern void __set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t prot); #include diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index c8629eb07ba6..1de0e8a895ee 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -384,6 +384,7 @@ void __init setup_arch(char **cmdline_p) *cmdline_p = boot_command_line; + early_fixmap_init(); early_ioremap_init(); parse_early_param(); diff --git a/arch/arm64/mm/ioremap.c b/arch/arm64/mm/ioremap.c index 4a07630a6616..cbb99c8f1e04 100644 --- a/arch/arm64/mm/ioremap.c +++ b/arch/arm64/mm/ioremap.c @@ -103,97 +103,10 @@ void __iomem *ioremap_cache(phys_addr_t phys_addr, size_t size) } EXPORT_SYMBOL(ioremap_cache); -static pte_t bm_pte[PTRS_PER_PTE] __page_aligned_bss; -#if CONFIG_ARM64_PGTABLE_LEVELS > 2 -static pmd_t bm_pmd[PTRS_PER_PMD] __page_aligned_bss; -#endif -#if CONFIG_ARM64_PGTABLE_LEVELS > 3 -static pud_t bm_pud[PTRS_PER_PUD] __page_aligned_bss; -#endif - -static inline pud_t * __init early_ioremap_pud(unsigned long addr) -{ - pgd_t *pgd; - - pgd = pgd_offset_k(addr); - BUG_ON(pgd_none(*pgd) || pgd_bad(*pgd)); - - return pud_offset(pgd, addr); -} - -static inline pmd_t * __init early_ioremap_pmd(unsigned long addr) -{ - pud_t *pud = early_ioremap_pud(addr); - - BUG_ON(pud_none(*pud) || pud_bad(*pud)); - - return pmd_offset(pud, addr); -} - -static inline pte_t * __init early_ioremap_pte(unsigned long addr) -{ - pmd_t *pmd = early_ioremap_pmd(addr); - - BUG_ON(pmd_none(*pmd) || pmd_bad(*pmd)); - - return pte_offset_kernel(pmd, addr); -} - +/* + * Must be called after early_fixmap_init + */ void __init early_ioremap_init(void) { - pgd_t *pgd; - pud_t *pud; - pmd_t *pmd; - unsigned long addr = fix_to_virt(FIX_BTMAP_BEGIN); - - pgd = pgd_offset_k(addr); - pgd_populate(&init_mm, pgd, bm_pud); - pud = pud_offset(pgd, addr); - pud_populate(&init_mm, pud, bm_pmd); - pmd = pmd_offset(pud, addr); - pmd_populate_kernel(&init_mm, pmd, bm_pte); - - /* - * The boot-ioremap range spans multiple pmds, for which - * we are not prepared: - */ - BUILD_BUG_ON((__fix_to_virt(FIX_BTMAP_BEGIN) >> PMD_SHIFT) - != (__fix_to_virt(FIX_BTMAP_END) >> PMD_SHIFT)); - - if (pmd != early_ioremap_pmd(fix_to_virt(FIX_BTMAP_END))) { - WARN_ON(1); - pr_warn("pmd %p != %p\n", - pmd, early_ioremap_pmd(fix_to_virt(FIX_BTMAP_END))); - pr_warn("fix_to_virt(FIX_BTMAP_BEGIN): %08lx\n", - fix_to_virt(FIX_BTMAP_BEGIN)); - pr_warn("fix_to_virt(FIX_BTMAP_END): %08lx\n", - fix_to_virt(FIX_BTMAP_END)); - - pr_warn("FIX_BTMAP_END: %d\n", FIX_BTMAP_END); - pr_warn("FIX_BTMAP_BEGIN: %d\n", - FIX_BTMAP_BEGIN); - } - early_ioremap_setup(); } - -void __init __early_set_fixmap(enum fixed_addresses idx, - phys_addr_t phys, pgprot_t flags) -{ - unsigned long addr = __fix_to_virt(idx); - pte_t *pte; - - if (idx >= __end_of_fixed_addresses) { - BUG(); - return; - } - - pte = early_ioremap_pte(addr); - - if (pgprot_val(flags)) - set_pte(pte, pfn_pte(phys >> PAGE_SHIFT, flags)); - else { - pte_clear(&init_mm, addr, pte); - flush_tlb_kernel_range(addr, addr+PAGE_SIZE); - } -} diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index f4f8b500f74c..6032f3e3056a 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -28,6 +28,7 @@ #include #include +#include #include #include #include @@ -463,3 +464,96 @@ void vmemmap_free(unsigned long start, unsigned long end) { } #endif /* CONFIG_SPARSEMEM_VMEMMAP */ + +static pte_t bm_pte[PTRS_PER_PTE] __page_aligned_bss; +#if CONFIG_ARM64_PGTABLE_LEVELS > 2 +static pmd_t bm_pmd[PTRS_PER_PMD] __page_aligned_bss; +#endif +#if CONFIG_ARM64_PGTABLE_LEVELS > 3 +static pud_t bm_pud[PTRS_PER_PUD] __page_aligned_bss; +#endif + +static inline pud_t * fixmap_pud(unsigned long addr) +{ + pgd_t *pgd = pgd_offset_k(addr); + + BUG_ON(pgd_none(*pgd) || pgd_bad(*pgd)); + + return pud_offset(pgd, addr); +} + +static inline pmd_t * fixmap_pmd(unsigned long addr) +{ + pud_t *pud = fixmap_pud(addr); + + BUG_ON(pud_none(*pud) || pud_bad(*pud)); + + return pmd_offset(pud, addr); +} + +static inline pte_t * fixmap_pte(unsigned long addr) +{ + pmd_t *pmd = fixmap_pmd(addr); + + BUG_ON(pmd_none(*pmd) || pmd_bad(*pmd)); + + return pte_offset_kernel(pmd, addr); +} + +void __init early_fixmap_init(void) +{ + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + unsigned long addr = FIXADDR_START; + + pgd = pgd_offset_k(addr); + pgd_populate(&init_mm, pgd, bm_pud); + pud = pud_offset(pgd, addr); + pud_populate(&init_mm, pud, bm_pmd); + pmd = pmd_offset(pud, addr); + pmd_populate_kernel(&init_mm, pmd, bm_pte); + + /* + * The boot-ioremap range spans multiple pmds, for which + * we are not preparted: + */ + BUILD_BUG_ON((__fix_to_virt(FIX_BTMAP_BEGIN) >> PMD_SHIFT) + != (__fix_to_virt(FIX_BTMAP_END) >> PMD_SHIFT)); + + if ((pmd != fixmap_pmd(fix_to_virt(FIX_BTMAP_BEGIN))) + || pmd != fixmap_pmd(fix_to_virt(FIX_BTMAP_END))) { + WARN_ON(1); + pr_warn("pmd %p != %p, %p\n", + pmd, fixmap_pmd(fix_to_virt(FIX_BTMAP_BEGIN)), + fixmap_pmd(fix_to_virt(FIX_BTMAP_END))); + pr_warn("fix_to_virt(FIX_BTMAP_BEGIN): %08lx\n", + fix_to_virt(FIX_BTMAP_BEGIN)); + pr_warn("fix_to_virt(FIX_BTMAP_END): %08lx\n", + fix_to_virt(FIX_BTMAP_END)); + + pr_warn("FIX_BTMAP_END: %d\n", FIX_BTMAP_END); + pr_warn("FIX_BTMAP_BEGIN: %d\n", FIX_BTMAP_BEGIN); + } +} + +void __set_fixmap(enum fixed_addresses idx, + phys_addr_t phys, pgprot_t flags) +{ + unsigned long addr = __fix_to_virt(idx); + pte_t *pte; + + if (idx >= __end_of_fixed_addresses) { + BUG(); + return; + } + + pte = fixmap_pte(addr); + + if (pgprot_val(flags)) { + set_pte(pte, pfn_pte(phys >> PAGE_SHIFT, flags)); + } else { + pte_clear(&init_mm, addr, pte); + flush_tlb_kernel_range(addr, addr+PAGE_SIZE); + } +} From 07c802bd7c3951410b105356787d655e273ab537 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 25 Nov 2014 15:26:13 +0000 Subject: [PATCH 51/70] arm64: vmlinux.lds.S: don't discard .exit.* sections at link-time .exit.* sections may be subject to patching by the new alternatives framework and so shouldn't be discarded at link-time. Without this patch, such a section will result in the following linker error: `.exit.text' referenced in section `.altinstructions' of drivers/built-in.o: defined in discarded section `.exit.text' of drivers/built-in.o Signed-off-by: Will Deacon --- arch/arm64/kernel/vmlinux.lds.S | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index 3236727be2b9..9965ec87cbec 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -11,8 +11,9 @@ #include "image.h" -#define ARM_EXIT_KEEP(x) -#define ARM_EXIT_DISCARD(x) x +/* .exit.text needed in case of alternative patching */ +#define ARM_EXIT_KEEP(x) x +#define ARM_EXIT_DISCARD(x) OUTPUT_ARCH(aarch64) ENTRY(_text) From c9453a3ab1a39230a18b3db1d677bbb2bd782baa Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 25 Nov 2014 18:27:01 +0000 Subject: [PATCH 52/70] arm64: alternatives: fix pr_fmt string for consistency Consistently use the plural form for alternatives pr_fmt strings. Signed-off-by: Will Deacon --- arch/arm64/kernel/cpu_errata.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 5a5226ffcbc8..fa62637e63a8 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -16,7 +16,7 @@ * along with this program. If not, see . */ -#define pr_fmt(fmt) "alternative: " fmt +#define pr_fmt(fmt) "alternatives: " fmt #include #include From dab78b6dcb2bfc90038f35ada826844273dde4d6 Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Wed, 26 Nov 2014 00:14:16 +0000 Subject: [PATCH 53/70] arm64: Add FIX_HOLE to permanent fixed addresses Every other architecture with permanent fixed addresses has FIX_HOLE as the first entry. This seems to be designed as a debugging aid but there are a couple of side effects of not having FIX_HOLE: - If the first fixed address is 0, fix_to_virt -> virt_to_fix triggers a BUG_ON for the virtual address being equal to FIXADDR_TOP - fix_to_virt may return a value outside of FIXADDR_START and FIXADDR_TOP which may look like a bug to a developer. Match up with other architectures and make everything clearer by adding FIX_HOLE. Signed-off-by: Laura Abbott Signed-off-by: Will Deacon --- arch/arm64/include/asm/fixmap.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/include/asm/fixmap.h b/arch/arm64/include/asm/fixmap.h index db26a2f2b7a7..9ef6eca905ca 100644 --- a/arch/arm64/include/asm/fixmap.h +++ b/arch/arm64/include/asm/fixmap.h @@ -31,6 +31,7 @@ * */ enum fixed_addresses { + FIX_HOLE, FIX_EARLYCON_MEM_BASE, __end_of_permanent_fixed_addresses, From c9465b4ec37a68425c5a574b56280dc1a7e34070 Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Wed, 26 Nov 2014 00:28:39 +0000 Subject: [PATCH 54/70] arm64: add support to dump the kernel page tables In a similar manner to arm, it's useful to be able to dump the page tables to verify permissions and memory types. Add a debugfs file to check the page tables. Acked-by: Steve Capper Tested-by: Steve Capper Reviewed-by: Mark Rutland Tested-by: Mark Rutland Signed-off-by: Laura Abbott [will: s/BUFFERABLE/NORMAL-NC/] Signed-off-by: Will Deacon --- arch/arm64/Kconfig.debug | 12 ++ arch/arm64/mm/Makefile | 1 + arch/arm64/mm/dump.c | 332 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 345 insertions(+) create mode 100644 arch/arm64/mm/dump.c diff --git a/arch/arm64/Kconfig.debug b/arch/arm64/Kconfig.debug index 0a12933e50ed..5fdd6dce8061 100644 --- a/arch/arm64/Kconfig.debug +++ b/arch/arm64/Kconfig.debug @@ -6,6 +6,18 @@ config FRAME_POINTER bool default y +config ARM64_PTDUMP + bool "Export kernel pagetable layout to userspace via debugfs" + depends on DEBUG_KERNEL + select DEBUG_FS + help + Say Y here if you want to show the kernel pagetable layout in a + debugfs file. This information is only useful for kernel developers + who are working in architecture specific areas of the kernel. + It is probably not a good idea to enable this feature in a production + kernel. + If in doubt, say "N" + config STRICT_DEVMEM bool "Filter access to /dev/mem" depends on MMU diff --git a/arch/arm64/mm/Makefile b/arch/arm64/mm/Makefile index c56179ed2c09..773d37a14039 100644 --- a/arch/arm64/mm/Makefile +++ b/arch/arm64/mm/Makefile @@ -3,3 +3,4 @@ obj-y := dma-mapping.o extable.o fault.o init.o \ ioremap.o mmap.o pgd.o mmu.o \ context.o proc.o pageattr.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o +obj-$(CONFIG_ARM64_PTDUMP) += dump.o diff --git a/arch/arm64/mm/dump.c b/arch/arm64/mm/dump.c new file mode 100644 index 000000000000..bf69601be546 --- /dev/null +++ b/arch/arm64/mm/dump.c @@ -0,0 +1,332 @@ +/* + * Copyright (c) 2014, The Linux Foundation. All rights reserved. + * Debug helper to dump the current kernel pagetables of the system + * so that we can see what the various memory ranges are set to. + * + * Derived from x86 and arm implementation: + * (C) Copyright 2008 Intel Corporation + * + * Author: Arjan van de Ven + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; version 2 + * of the License. + */ +#include +#include +#include +#include +#include + +#include +#include + +#define LOWEST_ADDR (UL(0xffffffffffffffff) << VA_BITS) + +struct addr_marker { + unsigned long start_address; + const char *name; +}; + +enum address_markers_idx { + VMALLOC_START_NR = 0, + VMALLOC_END_NR, +#ifdef CONFIG_SPARSEMEM_VMEMMAP + VMEMMAP_START_NR, + VMEMMAP_END_NR, +#endif + PCI_START_NR, + PCI_END_NR, + FIXADDR_START_NR, + FIXADDR_END_NR, + MODULES_START_NR, + MODUELS_END_NR, + KERNEL_SPACE_NR, +}; + +static struct addr_marker address_markers[] = { + { VMALLOC_START, "vmalloc() Area" }, + { VMALLOC_END, "vmalloc() End" }, +#ifdef CONFIG_SPARSEMEM_VMEMMAP + { 0, "vmemmap start" }, + { 0, "vmemmap end" }, +#endif + { (unsigned long) PCI_IOBASE, "PCI I/O start" }, + { (unsigned long) PCI_IOBASE + SZ_16M, "PCI I/O end" }, + { FIXADDR_START, "Fixmap start" }, + { FIXADDR_TOP, "Fixmap end" }, + { MODULES_VADDR, "Modules start" }, + { MODULES_END, "Modules end" }, + { PAGE_OFFSET, "Kernel Mapping" }, + { -1, NULL }, +}; + +struct pg_state { + struct seq_file *seq; + const struct addr_marker *marker; + unsigned long start_address; + unsigned level; + u64 current_prot; +}; + +struct prot_bits { + u64 mask; + u64 val; + const char *set; + const char *clear; +}; + +static const struct prot_bits pte_bits[] = { + { + .mask = PTE_USER, + .val = PTE_USER, + .set = "USR", + .clear = " ", + }, { + .mask = PTE_RDONLY, + .val = PTE_RDONLY, + .set = "ro", + .clear = "RW", + }, { + .mask = PTE_PXN, + .val = PTE_PXN, + .set = "NX", + .clear = "x ", + }, { + .mask = PTE_SHARED, + .val = PTE_SHARED, + .set = "SHD", + .clear = " ", + }, { + .mask = PTE_AF, + .val = PTE_AF, + .set = "AF", + .clear = " ", + }, { + .mask = PTE_NG, + .val = PTE_NG, + .set = "NG", + .clear = " ", + }, { + .mask = PTE_UXN, + .val = PTE_UXN, + .set = "UXN", + }, { + .mask = PTE_ATTRINDX_MASK, + .val = PTE_ATTRINDX(MT_DEVICE_nGnRnE), + .set = "DEVICE/nGnRnE", + }, { + .mask = PTE_ATTRINDX_MASK, + .val = PTE_ATTRINDX(MT_DEVICE_nGnRE), + .set = "DEVICE/nGnRE", + }, { + .mask = PTE_ATTRINDX_MASK, + .val = PTE_ATTRINDX(MT_DEVICE_GRE), + .set = "DEVICE/GRE", + }, { + .mask = PTE_ATTRINDX_MASK, + .val = PTE_ATTRINDX(MT_NORMAL_NC), + .set = "MEM/NORMAL-NC", + }, { + .mask = PTE_ATTRINDX_MASK, + .val = PTE_ATTRINDX(MT_NORMAL), + .set = "MEM/NORMAL", + } +}; + +struct pg_level { + const struct prot_bits *bits; + size_t num; + u64 mask; +}; + +static struct pg_level pg_level[] = { + { + }, { /* pgd */ + .bits = pte_bits, + .num = ARRAY_SIZE(pte_bits), + }, { /* pud */ + .bits = pte_bits, + .num = ARRAY_SIZE(pte_bits), + }, { /* pmd */ + .bits = pte_bits, + .num = ARRAY_SIZE(pte_bits), + }, { /* pte */ + .bits = pte_bits, + .num = ARRAY_SIZE(pte_bits), + }, +}; + +static void dump_prot(struct pg_state *st, const struct prot_bits *bits, + size_t num) +{ + unsigned i; + + for (i = 0; i < num; i++, bits++) { + const char *s; + + if ((st->current_prot & bits->mask) == bits->val) + s = bits->set; + else + s = bits->clear; + + if (s) + seq_printf(st->seq, " %s", s); + } +} + +static void note_page(struct pg_state *st, unsigned long addr, unsigned level, + u64 val) +{ + static const char units[] = "KMGTPE"; + u64 prot = val & pg_level[level].mask; + + if (addr < LOWEST_ADDR) + return; + + if (!st->level) { + st->level = level; + st->current_prot = prot; + st->start_address = addr; + seq_printf(st->seq, "---[ %s ]---\n", st->marker->name); + } else if (prot != st->current_prot || level != st->level || + addr >= st->marker[1].start_address) { + const char *unit = units; + unsigned long delta; + + if (st->current_prot) { + seq_printf(st->seq, "0x%16lx-0x%16lx ", + st->start_address, addr); + + delta = (addr - st->start_address) >> 10; + while (!(delta & 1023) && unit[1]) { + delta >>= 10; + unit++; + } + seq_printf(st->seq, "%9lu%c", delta, *unit); + if (pg_level[st->level].bits) + dump_prot(st, pg_level[st->level].bits, + pg_level[st->level].num); + seq_puts(st->seq, "\n"); + } + + if (addr >= st->marker[1].start_address) { + st->marker++; + seq_printf(st->seq, "---[ %s ]---\n", st->marker->name); + } + + st->start_address = addr; + st->current_prot = prot; + st->level = level; + } + + if (addr >= st->marker[1].start_address) { + st->marker++; + seq_printf(st->seq, "---[ %s ]---\n", st->marker->name); + } + +} + +static void walk_pte(struct pg_state *st, pmd_t *pmd, unsigned long start) +{ + pte_t *pte = pte_offset_kernel(pmd, 0); + unsigned long addr; + unsigned i; + + for (i = 0; i < PTRS_PER_PTE; i++, pte++) { + addr = start + i * PAGE_SIZE; + note_page(st, addr, 4, pte_val(*pte)); + } +} + +static void walk_pmd(struct pg_state *st, pud_t *pud, unsigned long start) +{ + pmd_t *pmd = pmd_offset(pud, 0); + unsigned long addr; + unsigned i; + + for (i = 0; i < PTRS_PER_PMD; i++, pmd++) { + addr = start + i * PMD_SIZE; + if (pmd_none(*pmd) || pmd_sect(*pmd) || pmd_bad(*pmd)) + note_page(st, addr, 3, pmd_val(*pmd)); + else + walk_pte(st, pmd, addr); + } +} + +static void walk_pud(struct pg_state *st, pgd_t *pgd, unsigned long start) +{ + pud_t *pud = pud_offset(pgd, 0); + unsigned long addr; + unsigned i; + + for (i = 0; i < PTRS_PER_PUD; i++, pud++) { + addr = start + i * PUD_SIZE; + if (pud_none(*pud) || pud_sect(*pud) || pud_bad(*pud)) + note_page(st, addr, 2, pud_val(*pud)); + else + walk_pmd(st, pud, addr); + } +} + +static void walk_pgd(struct pg_state *st, struct mm_struct *mm, unsigned long start) +{ + pgd_t *pgd = pgd_offset(mm, 0); + unsigned i; + unsigned long addr; + + for (i = 0; i < PTRS_PER_PGD; i++, pgd++) { + addr = start + i * PGDIR_SIZE; + if (pgd_none(*pgd) || pgd_bad(*pgd)) + note_page(st, addr, 1, pgd_val(*pgd)); + else + walk_pud(st, pgd, addr); + } +} + +static int ptdump_show(struct seq_file *m, void *v) +{ + struct pg_state st = { + .seq = m, + .marker = address_markers, + }; + + walk_pgd(&st, &init_mm, LOWEST_ADDR); + + note_page(&st, 0, 0, 0); + return 0; +} + +static int ptdump_open(struct inode *inode, struct file *file) +{ + return single_open(file, ptdump_show, NULL); +} + +static const struct file_operations ptdump_fops = { + .open = ptdump_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int ptdump_init(void) +{ + struct dentry *pe; + unsigned i, j; + + for (i = 0; i < ARRAY_SIZE(pg_level); i++) + if (pg_level[i].bits) + for (j = 0; j < pg_level[i].num; j++) + pg_level[i].mask |= pg_level[i].bits[j].mask; + + address_markers[VMEMMAP_START_NR].start_address = + (unsigned long)virt_to_page(PAGE_OFFSET); + address_markers[VMEMMAP_END_NR].start_address = + (unsigned long)virt_to_page(high_memory); + + pe = debugfs_create_file("kernel_page_tables", 0400, NULL, NULL, + &ptdump_fops); + return pe ? 0 : -ENOMEM; +} +device_initcall(ptdump_init); From 6ddae4186886a81e22ad78ad7c6936ed36bc8225 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 25 Nov 2014 15:44:18 +0000 Subject: [PATCH 55/70] arm64: jump labels: NOP out NOP -> NOP replacement In the arm64 arch_static_branch implementation we place an A64 NOP into the instruction stream and log relevant details to a jump_entry in a __jump_table section. Later this may be replaced with an immediate branch without link to the code for the unlikely case. At init time, the core calls arch_jump_label_transform_static to initialise the NOPs. On x86 this involves inserting the optimal NOP for a given microarchitecture, but on arm64 we only use the architectural NOP, and hence replace each NOP with the exact same NOP. This is somewhat pointless. Additionally, at module load time we don't call jump_label_apply_nops to patch the optimal NOPs in, unlike other architectures, but get away with this because we only use the architectural NOP anyway. A later notifier will patch NOPs with branches as required. Similarly to x86 commit 11570da1c5b1dee1 (x86/jump-label: Do not bother updating NOPs if they are correct), we can avoid patching NOPs with identical NOPs. Given that we only use a single NOP encoding, this means we can NOP-out the body of arch_jump_label_transform_static entirely. As the default __weak arch_jump_label_transform_static implementation performs a patch, we must use an empty function to achieve this. Cc: Catalin Marinas Cc: Jiang Liu Cc: Laura Abbott Acked-by: Will Deacon Signed-off-by: Mark Rutland Signed-off-by: Will Deacon --- arch/arm64/kernel/jump_label.c | 23 +++++++++-------------- 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/arch/arm64/kernel/jump_label.c b/arch/arm64/kernel/jump_label.c index 263a166291fb..4f1fec7a46db 100644 --- a/arch/arm64/kernel/jump_label.c +++ b/arch/arm64/kernel/jump_label.c @@ -22,9 +22,8 @@ #ifdef HAVE_JUMP_LABEL -static void __arch_jump_label_transform(struct jump_entry *entry, - enum jump_label_type type, - bool is_static) +void arch_jump_label_transform(struct jump_entry *entry, + enum jump_label_type type) { void *addr = (void *)entry->code; u32 insn; @@ -37,22 +36,18 @@ static void __arch_jump_label_transform(struct jump_entry *entry, insn = aarch64_insn_gen_nop(); } - if (is_static) - aarch64_insn_patch_text_nosync(addr, insn); - else - aarch64_insn_patch_text(&addr, &insn, 1); -} - -void arch_jump_label_transform(struct jump_entry *entry, - enum jump_label_type type) -{ - __arch_jump_label_transform(entry, type, false); + aarch64_insn_patch_text(&addr, &insn, 1); } void arch_jump_label_transform_static(struct jump_entry *entry, enum jump_label_type type) { - __arch_jump_label_transform(entry, type, true); + /* + * We use the architected A64 NOP in arch_static_branch, so there's no + * need to patch an identical A64 NOP over the top of it here. The core + * will call arch_jump_label_transform from a module notifier if the + * NOP needs to be replaced by a branch. + */ } #endif /* HAVE_JUMP_LABEL */ From 034edabe6cf1d0dea49d4c836ba128cec90fad04 Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Fri, 21 Nov 2014 13:50:41 -0800 Subject: [PATCH 56/70] arm64: Move some head.text functions to executable section The head.text section is intended to be run at early bootup before any of the regular kernel mappings have been setup. Parts of head.text may be freed back into the buddy allocator due to TEXT_OFFSET so for security requirements this memory must not be executable. The suspend/resume/hotplug code path requires some of these head.S functions to run however which means they need to be executable. Support these conflicting requirements by moving the few head.text functions that need to be executable to the text section which has the appropriate page table permissions. Tested-by: Kees Cook Reviewed-by: Mark Rutland Tested-by: Mark Rutland Signed-off-by: Laura Abbott Signed-off-by: Will Deacon --- arch/arm64/kernel/head.S | 407 ++++++++++++++++++++------------------- 1 file changed, 209 insertions(+), 198 deletions(-) diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index fddda8565a52..8ce88e08c030 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -248,7 +248,13 @@ ENTRY(stext) mov x0, x22 bl lookup_processor_type mov x23, x0 // x23=current cpu_table - cbz x23, __error_p // invalid processor (x23=0)? + /* + * __error_p may end up out of range for cbz if text areas are + * aligned up to section sizes. + */ + cbnz x23, 1f // invalid processor (x23=0)? + b __error_p +1: bl __vet_fdt bl __create_page_tables // x25=TTBR0, x26=TTBR1 /* @@ -260,12 +266,213 @@ ENTRY(stext) */ ldr x27, __switch_data // address to jump to after // MMU has been enabled - adr lr, __enable_mmu // return (PIC) address + adrp lr, __enable_mmu // return (PIC) address + add lr, lr, #:lo12:__enable_mmu ldr x12, [x23, #CPU_INFO_SETUP] add x12, x12, x28 // __virt_to_phys br x12 // initialise processor ENDPROC(stext) +/* + * Determine validity of the x21 FDT pointer. + * The dtb must be 8-byte aligned and live in the first 512M of memory. + */ +__vet_fdt: + tst x21, #0x7 + b.ne 1f + cmp x21, x24 + b.lt 1f + mov x0, #(1 << 29) + add x0, x0, x24 + cmp x21, x0 + b.ge 1f + ret +1: + mov x21, #0 + ret +ENDPROC(__vet_fdt) +/* + * Macro to create a table entry to the next page. + * + * tbl: page table address + * virt: virtual address + * shift: #imm page table shift + * ptrs: #imm pointers per table page + * + * Preserves: virt + * Corrupts: tmp1, tmp2 + * Returns: tbl -> next level table page address + */ + .macro create_table_entry, tbl, virt, shift, ptrs, tmp1, tmp2 + lsr \tmp1, \virt, #\shift + and \tmp1, \tmp1, #\ptrs - 1 // table index + add \tmp2, \tbl, #PAGE_SIZE + orr \tmp2, \tmp2, #PMD_TYPE_TABLE // address of next table and entry type + str \tmp2, [\tbl, \tmp1, lsl #3] + add \tbl, \tbl, #PAGE_SIZE // next level table page + .endm + +/* + * Macro to populate the PGD (and possibily PUD) for the corresponding + * block entry in the next level (tbl) for the given virtual address. + * + * Preserves: tbl, next, virt + * Corrupts: tmp1, tmp2 + */ + .macro create_pgd_entry, tbl, virt, tmp1, tmp2 + create_table_entry \tbl, \virt, PGDIR_SHIFT, PTRS_PER_PGD, \tmp1, \tmp2 +#if SWAPPER_PGTABLE_LEVELS == 3 + create_table_entry \tbl, \virt, TABLE_SHIFT, PTRS_PER_PTE, \tmp1, \tmp2 +#endif + .endm + +/* + * Macro to populate block entries in the page table for the start..end + * virtual range (inclusive). + * + * Preserves: tbl, flags + * Corrupts: phys, start, end, pstate + */ + .macro create_block_map, tbl, flags, phys, start, end + lsr \phys, \phys, #BLOCK_SHIFT + lsr \start, \start, #BLOCK_SHIFT + and \start, \start, #PTRS_PER_PTE - 1 // table index + orr \phys, \flags, \phys, lsl #BLOCK_SHIFT // table entry + lsr \end, \end, #BLOCK_SHIFT + and \end, \end, #PTRS_PER_PTE - 1 // table end index +9999: str \phys, [\tbl, \start, lsl #3] // store the entry + add \start, \start, #1 // next entry + add \phys, \phys, #BLOCK_SIZE // next block + cmp \start, \end + b.ls 9999b + .endm + +/* + * Setup the initial page tables. We only setup the barest amount which is + * required to get the kernel running. The following sections are required: + * - identity mapping to enable the MMU (low address, TTBR0) + * - first few MB of the kernel linear mapping to jump to once the MMU has + * been enabled, including the FDT blob (TTBR1) + * - pgd entry for fixed mappings (TTBR1) + */ +__create_page_tables: + pgtbl x25, x26, x28 // idmap_pg_dir and swapper_pg_dir addresses + mov x27, lr + + /* + * Invalidate the idmap and swapper page tables to avoid potential + * dirty cache lines being evicted. + */ + mov x0, x25 + add x1, x26, #SWAPPER_DIR_SIZE + bl __inval_cache_range + + /* + * Clear the idmap and swapper page tables. + */ + mov x0, x25 + add x6, x26, #SWAPPER_DIR_SIZE +1: stp xzr, xzr, [x0], #16 + stp xzr, xzr, [x0], #16 + stp xzr, xzr, [x0], #16 + stp xzr, xzr, [x0], #16 + cmp x0, x6 + b.lo 1b + + ldr x7, =MM_MMUFLAGS + + /* + * Create the identity mapping. + */ + mov x0, x25 // idmap_pg_dir + ldr x3, =KERNEL_START + add x3, x3, x28 // __pa(KERNEL_START) + create_pgd_entry x0, x3, x5, x6 + ldr x6, =KERNEL_END + mov x5, x3 // __pa(KERNEL_START) + add x6, x6, x28 // __pa(KERNEL_END) + create_block_map x0, x7, x3, x5, x6 + + /* + * Map the kernel image (starting with PHYS_OFFSET). + */ + mov x0, x26 // swapper_pg_dir + mov x5, #PAGE_OFFSET + create_pgd_entry x0, x5, x3, x6 + ldr x6, =KERNEL_END + mov x3, x24 // phys offset + create_block_map x0, x7, x3, x5, x6 + + /* + * Map the FDT blob (maximum 2MB; must be within 512MB of + * PHYS_OFFSET). + */ + mov x3, x21 // FDT phys address + and x3, x3, #~((1 << 21) - 1) // 2MB aligned + mov x6, #PAGE_OFFSET + sub x5, x3, x24 // subtract PHYS_OFFSET + tst x5, #~((1 << 29) - 1) // within 512MB? + csel x21, xzr, x21, ne // zero the FDT pointer + b.ne 1f + add x5, x5, x6 // __va(FDT blob) + add x6, x5, #1 << 21 // 2MB for the FDT blob + sub x6, x6, #1 // inclusive range + create_block_map x0, x7, x3, x5, x6 +1: + /* + * Since the page tables have been populated with non-cacheable + * accesses (MMU disabled), invalidate the idmap and swapper page + * tables again to remove any speculatively loaded cache lines. + */ + mov x0, x25 + add x1, x26, #SWAPPER_DIR_SIZE + bl __inval_cache_range + + mov lr, x27 + ret +ENDPROC(__create_page_tables) + .ltorg + + .align 3 + .type __switch_data, %object +__switch_data: + .quad __mmap_switched + .quad __bss_start // x6 + .quad __bss_stop // x7 + .quad processor_id // x4 + .quad __fdt_pointer // x5 + .quad memstart_addr // x6 + .quad init_thread_union + THREAD_START_SP // sp + +/* + * The following fragment of code is executed with the MMU on in MMU mode, and + * uses absolute addresses; this is not position independent. + */ +__mmap_switched: + adr x3, __switch_data + 8 + + ldp x6, x7, [x3], #16 +1: cmp x6, x7 + b.hs 2f + str xzr, [x6], #8 // Clear BSS + b 1b +2: + ldp x4, x5, [x3], #16 + ldr x6, [x3], #8 + ldr x16, [x3] + mov sp, x16 + str x22, [x4] // Save processor ID + str x21, [x5] // Save FDT pointer + str x24, [x6] // Save PHYS_OFFSET + mov x29, #0 + b start_kernel +ENDPROC(__mmap_switched) + +/* + * end early head section, begin head code that is also used for + * hotplug and needs to have the same protections as the text region + */ + .section ".text","ax" /* * If we're fortunate enough to boot at EL2, ensure that the world is * sane before dropping to EL1. @@ -502,183 +709,6 @@ ENDPROC(__calc_phys_offset) 1: .quad . .quad PAGE_OFFSET -/* - * Macro to create a table entry to the next page. - * - * tbl: page table address - * virt: virtual address - * shift: #imm page table shift - * ptrs: #imm pointers per table page - * - * Preserves: virt - * Corrupts: tmp1, tmp2 - * Returns: tbl -> next level table page address - */ - .macro create_table_entry, tbl, virt, shift, ptrs, tmp1, tmp2 - lsr \tmp1, \virt, #\shift - and \tmp1, \tmp1, #\ptrs - 1 // table index - add \tmp2, \tbl, #PAGE_SIZE - orr \tmp2, \tmp2, #PMD_TYPE_TABLE // address of next table and entry type - str \tmp2, [\tbl, \tmp1, lsl #3] - add \tbl, \tbl, #PAGE_SIZE // next level table page - .endm - -/* - * Macro to populate the PGD (and possibily PUD) for the corresponding - * block entry in the next level (tbl) for the given virtual address. - * - * Preserves: tbl, next, virt - * Corrupts: tmp1, tmp2 - */ - .macro create_pgd_entry, tbl, virt, tmp1, tmp2 - create_table_entry \tbl, \virt, PGDIR_SHIFT, PTRS_PER_PGD, \tmp1, \tmp2 -#if SWAPPER_PGTABLE_LEVELS == 3 - create_table_entry \tbl, \virt, TABLE_SHIFT, PTRS_PER_PTE, \tmp1, \tmp2 -#endif - .endm - -/* - * Macro to populate block entries in the page table for the start..end - * virtual range (inclusive). - * - * Preserves: tbl, flags - * Corrupts: phys, start, end, pstate - */ - .macro create_block_map, tbl, flags, phys, start, end - lsr \phys, \phys, #BLOCK_SHIFT - lsr \start, \start, #BLOCK_SHIFT - and \start, \start, #PTRS_PER_PTE - 1 // table index - orr \phys, \flags, \phys, lsl #BLOCK_SHIFT // table entry - lsr \end, \end, #BLOCK_SHIFT - and \end, \end, #PTRS_PER_PTE - 1 // table end index -9999: str \phys, [\tbl, \start, lsl #3] // store the entry - add \start, \start, #1 // next entry - add \phys, \phys, #BLOCK_SIZE // next block - cmp \start, \end - b.ls 9999b - .endm - -/* - * Setup the initial page tables. We only setup the barest amount which is - * required to get the kernel running. The following sections are required: - * - identity mapping to enable the MMU (low address, TTBR0) - * - first few MB of the kernel linear mapping to jump to once the MMU has - * been enabled, including the FDT blob (TTBR1) - * - pgd entry for fixed mappings (TTBR1) - */ -__create_page_tables: - pgtbl x25, x26, x28 // idmap_pg_dir and swapper_pg_dir addresses - mov x27, lr - - /* - * Invalidate the idmap and swapper page tables to avoid potential - * dirty cache lines being evicted. - */ - mov x0, x25 - add x1, x26, #SWAPPER_DIR_SIZE - bl __inval_cache_range - - /* - * Clear the idmap and swapper page tables. - */ - mov x0, x25 - add x6, x26, #SWAPPER_DIR_SIZE -1: stp xzr, xzr, [x0], #16 - stp xzr, xzr, [x0], #16 - stp xzr, xzr, [x0], #16 - stp xzr, xzr, [x0], #16 - cmp x0, x6 - b.lo 1b - - ldr x7, =MM_MMUFLAGS - - /* - * Create the identity mapping. - */ - mov x0, x25 // idmap_pg_dir - ldr x3, =KERNEL_START - add x3, x3, x28 // __pa(KERNEL_START) - create_pgd_entry x0, x3, x5, x6 - ldr x6, =KERNEL_END - mov x5, x3 // __pa(KERNEL_START) - add x6, x6, x28 // __pa(KERNEL_END) - create_block_map x0, x7, x3, x5, x6 - - /* - * Map the kernel image (starting with PHYS_OFFSET). - */ - mov x0, x26 // swapper_pg_dir - mov x5, #PAGE_OFFSET - create_pgd_entry x0, x5, x3, x6 - ldr x6, =KERNEL_END - mov x3, x24 // phys offset - create_block_map x0, x7, x3, x5, x6 - - /* - * Map the FDT blob (maximum 2MB; must be within 512MB of - * PHYS_OFFSET). - */ - mov x3, x21 // FDT phys address - and x3, x3, #~((1 << 21) - 1) // 2MB aligned - mov x6, #PAGE_OFFSET - sub x5, x3, x24 // subtract PHYS_OFFSET - tst x5, #~((1 << 29) - 1) // within 512MB? - csel x21, xzr, x21, ne // zero the FDT pointer - b.ne 1f - add x5, x5, x6 // __va(FDT blob) - add x6, x5, #1 << 21 // 2MB for the FDT blob - sub x6, x6, #1 // inclusive range - create_block_map x0, x7, x3, x5, x6 -1: - /* - * Since the page tables have been populated with non-cacheable - * accesses (MMU disabled), invalidate the idmap and swapper page - * tables again to remove any speculatively loaded cache lines. - */ - mov x0, x25 - add x1, x26, #SWAPPER_DIR_SIZE - bl __inval_cache_range - - mov lr, x27 - ret -ENDPROC(__create_page_tables) - .ltorg - - .align 3 - .type __switch_data, %object -__switch_data: - .quad __mmap_switched - .quad __bss_start // x6 - .quad __bss_stop // x7 - .quad processor_id // x4 - .quad __fdt_pointer // x5 - .quad memstart_addr // x6 - .quad init_thread_union + THREAD_START_SP // sp - -/* - * The following fragment of code is executed with the MMU on in MMU mode, and - * uses absolute addresses; this is not position independent. - */ -__mmap_switched: - adr x3, __switch_data + 8 - - ldp x6, x7, [x3], #16 -1: cmp x6, x7 - b.hs 2f - str xzr, [x6], #8 // Clear BSS - b 1b -2: - ldp x4, x5, [x3], #16 - ldr x6, [x3], #8 - ldr x16, [x3] - mov sp, x16 - str x22, [x4] // Save processor ID - str x21, [x5] // Save FDT pointer - str x24, [x6] // Save PHYS_OFFSET - mov x29, #0 - b start_kernel -ENDPROC(__mmap_switched) - /* * Exception handling. Something went wrong and we can't proceed. We ought to * tell the user, but since we don't have any guarantee that we're even @@ -726,22 +756,3 @@ __lookup_processor_type_data: .quad . .quad cpu_table .size __lookup_processor_type_data, . - __lookup_processor_type_data - -/* - * Determine validity of the x21 FDT pointer. - * The dtb must be 8-byte aligned and live in the first 512M of memory. - */ -__vet_fdt: - tst x21, #0x7 - b.ne 1f - cmp x21, x24 - b.lt 1f - mov x0, #(1 << 29) - add x0, x0, x24 - cmp x21, x0 - b.ge 1f - ret -1: - mov x21, #0 - ret -ENDPROC(__vet_fdt) From 766a85d7bc5d7f1ddd6de28bdb844eae45ec63b0 Mon Sep 17 00:00:00 2001 From: AKASHI Takahiro Date: Fri, 28 Nov 2014 05:26:34 +0000 Subject: [PATCH 57/70] arm64: ptrace: add NT_ARM_SYSTEM_CALL regset This regeset is intended to be used to get and set a system call number while tracing. There was some discussion about possible approaches to do so: (1) modify x8 register with ptrace(PTRACE_SETREGSET) indirectly, and update regs->syscallno later on in syscall_trace_enter(), or (2) define a dedicated regset for this purpose as on s390, or (3) support ptrace(PTRACE_SET_SYSCALL) as on arch/arm Thinking of the fact that user_pt_regs doesn't expose 'syscallno' to tracer as well as that secure_computing() expects a changed syscall number, especially case of -1, to be visible before this function returns in syscall_trace_enter(), (1) doesn't work well. We will take (2) since it looks much cleaner. Signed-off-by: AKASHI Takahiro Signed-off-by: Will Deacon --- arch/arm64/kernel/ptrace.c | 35 +++++++++++++++++++++++++++++++++++ include/uapi/linux/elf.h | 1 + 2 files changed, 36 insertions(+) diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 8a4ae8e73213..f576781d8d3b 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -551,6 +551,32 @@ static int tls_set(struct task_struct *target, const struct user_regset *regset, return ret; } +static int system_call_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + int syscallno = task_pt_regs(target)->syscallno; + + return user_regset_copyout(&pos, &count, &kbuf, &ubuf, + &syscallno, 0, -1); +} + +static int system_call_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + int syscallno, ret; + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &syscallno, 0, -1); + if (ret) + return ret; + + task_pt_regs(target)->syscallno = syscallno; + return ret; +} + enum aarch64_regset { REGSET_GPR, REGSET_FPR, @@ -559,6 +585,7 @@ enum aarch64_regset { REGSET_HW_BREAK, REGSET_HW_WATCH, #endif + REGSET_SYSTEM_CALL, }; static const struct user_regset aarch64_regsets[] = { @@ -608,6 +635,14 @@ static const struct user_regset aarch64_regsets[] = { .set = hw_break_set, }, #endif + [REGSET_SYSTEM_CALL] = { + .core_note_type = NT_ARM_SYSTEM_CALL, + .n = 1, + .size = sizeof(int), + .align = sizeof(int), + .get = system_call_get, + .set = system_call_set, + }, }; static const struct user_regset_view user_aarch64_view = { diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h index ea9bf2561b9e..71e1d0ed92f7 100644 --- a/include/uapi/linux/elf.h +++ b/include/uapi/linux/elf.h @@ -397,6 +397,7 @@ typedef struct elf64_shdr { #define NT_ARM_TLS 0x401 /* ARM TLS register */ #define NT_ARM_HW_BREAK 0x402 /* ARM hardware breakpoint registers */ #define NT_ARM_HW_WATCH 0x403 /* ARM hardware watchpoint registers */ +#define NT_ARM_SYSTEM_CALL 0x404 /* ARM system call number */ #define NT_METAG_CBUF 0x500 /* Metag catch buffer registers */ #define NT_METAG_RPIPE 0x501 /* Metag read pipeline state */ #define NT_METAG_TLS 0x502 /* Metag TLS pointer */ From 1014c81d9a5546b64352c04cdb93494aceb317fc Mon Sep 17 00:00:00 2001 From: AKASHI Takahiro Date: Fri, 28 Nov 2014 05:26:35 +0000 Subject: [PATCH 58/70] arm64: ptrace: allow tracer to skip a system call If tracer modifies a syscall number to -1, this traced system call should be skipped with a return value specified in x0. This patch implements this semantics. Please note: * syscall entry tracing and syscall exit tracing (ftrace tracepoint and audit) are always executed, if enabled, even when skipping a system call (that is, -1). In this way, we can avoid a potential bug where audit_syscall_entry() might be called without audit_syscall_exit() at the previous system call being called, that would cause OOPs in audit_syscall_entry(). Signed-off-by: AKASHI Takahiro [will: fixed up conflict with blr rework] Signed-off-by: Will Deacon --- arch/arm64/kernel/entry.S | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 99c8d13fc00d..fd4fa374e5d2 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -669,8 +669,15 @@ ENDPROC(el0_svc) * switches, and waiting for our parent to respond. */ __sys_trace: - mov x0, sp + mov w0, #-1 // set default errno for + cmp scno, x0 // user-issued syscall(-1) + b.ne 1f + mov x0, #-ENOSYS + str x0, [sp, #S_X0] +1: mov x0, sp bl syscall_trace_enter + cmp w0, #-1 // skip the syscall? + b.eq __sys_trace_return_skipped uxtw scno, w0 // syscall number (possibly new) mov x1, sp // pointer to regs cmp scno, sc_nr // check upper syscall limit @@ -683,7 +690,8 @@ __sys_trace: blr x16 // call sys_* routine __sys_trace_return: - str x0, [sp] // save returned x0 + str x0, [sp, #S_X0] // save returned x0 +__sys_trace_return_skipped: mov x0, sp bl syscall_trace_exit b ret_to_user From 65a2ae8d5bd0ab9fb5846c0223d5dcf74e87f9d2 Mon Sep 17 00:00:00 2001 From: AKASHI Takahiro Date: Fri, 28 Nov 2014 05:26:36 +0000 Subject: [PATCH 59/70] asm-generic: add generic seccomp.h for secure computing mode 1 Those values (__NR_seccomp_*) are used solely in secure_computing() to identify mode 1 system calls. If compat system calls have different syscall numbers, asm/seccomp.h may override them. Acked-by: Arnd Bergmann Reviewed-by: Kees Cook Signed-off-by: AKASHI Takahiro Signed-off-by: Will Deacon --- include/asm-generic/seccomp.h | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 include/asm-generic/seccomp.h diff --git a/include/asm-generic/seccomp.h b/include/asm-generic/seccomp.h new file mode 100644 index 000000000000..9fa1f653ed3b --- /dev/null +++ b/include/asm-generic/seccomp.h @@ -0,0 +1,30 @@ +/* + * include/asm-generic/seccomp.h + * + * Copyright (C) 2014 Linaro Limited + * Author: AKASHI Takahiro + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef _ASM_GENERIC_SECCOMP_H +#define _ASM_GENERIC_SECCOMP_H + +#include + +#if defined(CONFIG_COMPAT) && !defined(__NR_seccomp_read_32) +#define __NR_seccomp_read_32 __NR_read +#define __NR_seccomp_write_32 __NR_write +#define __NR_seccomp_exit_32 __NR_exit +#define __NR_seccomp_sigreturn_32 __NR_rt_sigreturn +#endif /* CONFIG_COMPAT && ! already defined */ + +#define __NR_seccomp_read __NR_read +#define __NR_seccomp_write __NR_write +#define __NR_seccomp_exit __NR_exit +#ifndef __NR_seccomp_sigreturn +#define __NR_seccomp_sigreturn __NR_rt_sigreturn +#endif + +#endif /* _ASM_GENERIC_SECCOMP_H */ From e185fab7e13087cbd1f7e281d2698f07dbcdd44e Mon Sep 17 00:00:00 2001 From: AKASHI Takahiro Date: Fri, 28 Nov 2014 05:26:37 +0000 Subject: [PATCH 60/70] arm64: add seccomp syscall for compat task This patch allows compat task to issue seccomp() system call. Reviewed-by: Kees Cook Signed-off-by: AKASHI Takahiro Signed-off-by: Will Deacon --- arch/arm64/include/asm/unistd32.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h index da1f06b535e3..812f19212b23 100644 --- a/arch/arm64/include/asm/unistd32.h +++ b/arch/arm64/include/asm/unistd32.h @@ -787,7 +787,8 @@ __SYSCALL(__NR_sched_setattr, sys_sched_setattr) __SYSCALL(__NR_sched_getattr, sys_sched_getattr) #define __NR_renameat2 382 __SYSCALL(__NR_renameat2, sys_renameat2) - /* 383 for seccomp */ +#define __NR_seccomp 383 +__SYSCALL(__NR_seccomp, sys_seccomp) #define __NR_getrandom 384 __SYSCALL(__NR_getrandom, sys_getrandom) #define __NR_memfd_create 385 From cc5e9097c9aad6b186a568c534e26746d6bfa483 Mon Sep 17 00:00:00 2001 From: AKASHI Takahiro Date: Fri, 28 Nov 2014 05:26:38 +0000 Subject: [PATCH 61/70] arm64: add SIGSYS siginfo for compat task SIGSYS is primarily used in secure computing to notify tracer of syscall events. This patch allows signal handler on compat task to get correct information with SA_SIGINFO specified when this signal is delivered. Reviewed-by: Kees Cook Signed-off-by: AKASHI Takahiro Signed-off-by: Will Deacon --- arch/arm64/include/asm/compat.h | 7 +++++++ arch/arm64/kernel/signal32.c | 6 ++++++ 2 files changed, 13 insertions(+) diff --git a/arch/arm64/include/asm/compat.h b/arch/arm64/include/asm/compat.h index 56de5aadede2..3fb053fa6e98 100644 --- a/arch/arm64/include/asm/compat.h +++ b/arch/arm64/include/asm/compat.h @@ -205,6 +205,13 @@ typedef struct compat_siginfo { compat_long_t _band; /* POLL_IN, POLL_OUT, POLL_MSG */ int _fd; } _sigpoll; + + /* SIGSYS */ + struct { + compat_uptr_t _call_addr; /* calling user insn */ + int _syscall; /* triggering system call number */ + compat_uint_t _arch; /* AUDIT_ARCH_* of syscall */ + } _sigsys; } _sifields; } compat_siginfo_t; diff --git a/arch/arm64/kernel/signal32.c b/arch/arm64/kernel/signal32.c index 1b9ad02837cf..5a1ba6e80d4e 100644 --- a/arch/arm64/kernel/signal32.c +++ b/arch/arm64/kernel/signal32.c @@ -186,6 +186,12 @@ int copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from) err |= __put_user(from->si_uid, &to->si_uid); err |= __put_user((compat_uptr_t)(unsigned long)from->si_ptr, &to->si_ptr); break; + case __SI_SYS: + err |= __put_user((compat_uptr_t)(unsigned long) + from->si_call_addr, &to->si_call_addr); + err |= __put_user(from->si_syscall, &to->si_syscall); + err |= __put_user(from->si_arch, &to->si_arch); + break; default: /* this is just in case for now ... */ err |= __put_user(from->si_pid, &to->si_pid); err |= __put_user(from->si_uid, &to->si_uid); From a1ae65b219416a72c15577bd4c8c11174fffbb8b Mon Sep 17 00:00:00 2001 From: AKASHI Takahiro Date: Fri, 28 Nov 2014 05:26:39 +0000 Subject: [PATCH 62/70] arm64: add seccomp support secure_computing() is called first in syscall_trace_enter() so that a system call will be aborted quickly without doing succeeding syscall tracing if seccomp rules want to deny that system call. On compat task, syscall numbers for system calls allowed in seccomp mode 1 are different from those on normal tasks, and so _NR_seccomp_xxx_32's need to be redefined. Signed-off-by: AKASHI Takahiro Signed-off-by: Will Deacon --- arch/arm64/Kconfig | 14 ++++++++++++++ arch/arm64/include/asm/seccomp.h | 25 +++++++++++++++++++++++++ arch/arm64/include/asm/unistd.h | 3 +++ arch/arm64/kernel/ptrace.c | 5 +++++ 4 files changed, 47 insertions(+) create mode 100644 arch/arm64/include/asm/seccomp.h diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index f5412d628ff6..7c79c6494379 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -38,6 +38,7 @@ config ARM64 select HAVE_ARCH_AUDITSYSCALL select HAVE_ARCH_JUMP_LABEL select HAVE_ARCH_KGDB + select HAVE_ARCH_SECCOMP_FILTER select HAVE_ARCH_TRACEHOOK select HAVE_BPF_JIT select HAVE_C_RECORDMCOUNT @@ -455,6 +456,19 @@ config ARCH_HAS_CACHE_LINE_SIZE source "mm/Kconfig" +config SECCOMP + bool "Enable seccomp to safely compute untrusted bytecode" + ---help--- + This kernel feature is useful for number crunching applications + that may need to compute untrusted bytecode during their + execution. By using pipes or other transports made available to + the process as file descriptors supporting the read/write + syscalls, it's possible to isolate those applications in + their own address space using seccomp. Once seccomp is + enabled via prctl(PR_SET_SECCOMP), it cannot be disabled + and the task is only allowed to execute a few safe syscalls + defined by each seccomp mode. + config XEN_DOM0 def_bool y depends on XEN diff --git a/arch/arm64/include/asm/seccomp.h b/arch/arm64/include/asm/seccomp.h new file mode 100644 index 000000000000..c76fac979629 --- /dev/null +++ b/arch/arm64/include/asm/seccomp.h @@ -0,0 +1,25 @@ +/* + * arch/arm64/include/asm/seccomp.h + * + * Copyright (C) 2014 Linaro Limited + * Author: AKASHI Takahiro + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef _ASM_SECCOMP_H +#define _ASM_SECCOMP_H + +#include + +#ifdef CONFIG_COMPAT +#define __NR_seccomp_read_32 __NR_compat_read +#define __NR_seccomp_write_32 __NR_compat_write +#define __NR_seccomp_exit_32 __NR_compat_exit +#define __NR_seccomp_sigreturn_32 __NR_compat_rt_sigreturn +#endif /* CONFIG_COMPAT */ + +#include + +#endif /* _ASM_SECCOMP_H */ diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h index 6d2bf419431d..49c9aefd24a5 100644 --- a/arch/arm64/include/asm/unistd.h +++ b/arch/arm64/include/asm/unistd.h @@ -31,6 +31,9 @@ * Compat syscall numbers used by the AArch64 kernel. */ #define __NR_compat_restart_syscall 0 +#define __NR_compat_exit 1 +#define __NR_compat_read 3 +#define __NR_compat_write 4 #define __NR_compat_sigreturn 119 #define __NR_compat_rt_sigreturn 173 diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index f576781d8d3b..d882b833dbdb 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include @@ -1149,6 +1150,10 @@ static void tracehook_report_syscall(struct pt_regs *regs, asmlinkage int syscall_trace_enter(struct pt_regs *regs) { + /* Do the secure computing check first; failures should be fast. */ + if (secure_computing() == -1) + return -1; + if (test_thread_flag(TIF_SYSCALL_TRACE)) tracehook_report_syscall(regs, PTRACE_SYSCALL_ENTER); From a2d25a5391ca219f196f9fee7b535c40d201c6bf Mon Sep 17 00:00:00 2001 From: Vladimir Murzin Date: Mon, 1 Dec 2014 10:53:08 +0000 Subject: [PATCH 63/70] arm64: compat: align cacheflush syscall with arch/arm Update handling of cacheflush syscall with changes made in arch/arm counterpart: - return error to userspace when flushing syscall fails - split user cache-flushing into interruptible chunks - don't bother rounding to nearest vma Signed-off-by: Vladimir Murzin [will: changed internal return value from -EINTR to 0 to match arch/arm/] Signed-off-by: Will Deacon --- arch/arm64/include/asm/cacheflush.h | 2 +- arch/arm64/kernel/sys_compat.c | 49 +++++++++++++++++------------ arch/arm64/mm/cache.S | 6 +++- 3 files changed, 35 insertions(+), 22 deletions(-) diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h index 689b6379188c..7ae31a2cc6c0 100644 --- a/arch/arm64/include/asm/cacheflush.h +++ b/arch/arm64/include/asm/cacheflush.h @@ -73,7 +73,7 @@ extern void flush_cache_all(void); extern void flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned long end); extern void flush_icache_range(unsigned long start, unsigned long end); extern void __flush_dcache_area(void *addr, size_t len); -extern void __flush_cache_user_range(unsigned long start, unsigned long end); +extern long __flush_cache_user_range(unsigned long start, unsigned long end); static inline void flush_cache_mm(struct mm_struct *mm) { diff --git a/arch/arm64/kernel/sys_compat.c b/arch/arm64/kernel/sys_compat.c index dc47e53e9e28..28c511b06edf 100644 --- a/arch/arm64/kernel/sys_compat.c +++ b/arch/arm64/kernel/sys_compat.c @@ -28,29 +28,39 @@ #include #include -static inline void -do_compat_cache_op(unsigned long start, unsigned long end, int flags) +static long +__do_compat_cache_op(unsigned long start, unsigned long end) { - struct mm_struct *mm = current->active_mm; - struct vm_area_struct *vma; + long ret; - if (end < start || flags) - return; + do { + unsigned long chunk = min(PAGE_SIZE, end - start); - down_read(&mm->mmap_sem); - vma = find_vma(mm, start); - if (vma && vma->vm_start < end) { - if (start < vma->vm_start) - start = vma->vm_start; - if (end > vma->vm_end) - end = vma->vm_end; - up_read(&mm->mmap_sem); - __flush_cache_user_range(start & PAGE_MASK, PAGE_ALIGN(end)); - return; - } - up_read(&mm->mmap_sem); + if (fatal_signal_pending(current)) + return 0; + + ret = __flush_cache_user_range(start, start + chunk); + if (ret) + return ret; + + cond_resched(); + start += chunk; + } while (start < end); + + return 0; } +static inline long +do_compat_cache_op(unsigned long start, unsigned long end, int flags) +{ + if (end < start || flags) + return -EINVAL; + + if (!access_ok(VERIFY_READ, start, end - start)) + return -EFAULT; + + return __do_compat_cache_op(start, end); +} /* * Handle all unrecognised system calls. */ @@ -74,8 +84,7 @@ long compat_arm_syscall(struct pt_regs *regs) * the specified region). */ case __ARM_NR_compat_cacheflush: - do_compat_cache_op(regs->regs[0], regs->regs[1], regs->regs[2]); - return 0; + return do_compat_cache_op(regs->regs[0], regs->regs[1], regs->regs[2]); case __ARM_NR_compat_set_tls: current->thread.tp_value = regs->regs[0]; diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S index 8eaf18577d71..2560e1e1562e 100644 --- a/arch/arm64/mm/cache.S +++ b/arch/arm64/mm/cache.S @@ -17,6 +17,7 @@ * along with this program. If not, see . */ +#include #include #include #include @@ -140,9 +141,12 @@ USER(9f, ic ivau, x4 ) // invalidate I line PoU add x4, x4, x2 cmp x4, x1 b.lo 1b -9: // ignore any faulting cache operation dsb ish isb + mov x0, #0 + ret +9: + mov x0, #-EFAULT ret ENDPROC(flush_icache_range) ENDPROC(__flush_cache_user_range) From e4f88d833bec29b8e6fadc1b2488f0c6370935e1 Mon Sep 17 00:00:00 2001 From: Jungseok Lee Date: Tue, 2 Dec 2014 17:49:24 +0000 Subject: [PATCH 64/70] arm64: Implement support for read-mostly sections As putting data which is read mostly together, we can avoid unnecessary cache line bouncing. Other architectures, such as ARM and x86, adopted the same idea. Acked-by: Catalin Marinas Signed-off-by: Jungseok Lee Signed-off-by: Will Deacon --- arch/arm64/include/asm/cache.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/include/asm/cache.h b/arch/arm64/include/asm/cache.h index 88cc05b5f3ac..bde449936e2f 100644 --- a/arch/arm64/include/asm/cache.h +++ b/arch/arm64/include/asm/cache.h @@ -32,6 +32,8 @@ #ifndef __ASSEMBLY__ +#define __read_mostly __attribute__((__section__(".data..read_mostly"))) + static inline int cache_line_size(void) { u32 cwg = cache_type_cwg(); From 51c9fbb1b146f3336a93d398c439b6fbfe5ab489 Mon Sep 17 00:00:00 2001 From: Zi Shen Lim Date: Wed, 3 Dec 2014 08:38:01 +0000 Subject: [PATCH 65/70] arm64: bpf: lift restriction on last instruction Earlier implementation assumed last instruction is BPF_EXIT. Since this is no longer a restriction in eBPF, we remove this limitation. Per Alexei Starovoitov [1]: > classic BPF has a restriction that last insn is always BPF_RET. > eBPF doesn't have BPF_RET instruction and this restriction. > It has BPF_EXIT insn which can appear anywhere in the program > one or more times and it doesn't have to be last insn. [1] https://lkml.org/lkml/2014/11/27/2 Fixes: e54bcde3d69d ("arm64: eBPF JIT compiler") Acked-by: Alexei Starovoitov Signed-off-by: Zi Shen Lim Signed-off-by: Will Deacon --- arch/arm64/net/bpf_jit_comp.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index 41f1e3e2ea24..edba042b2325 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -60,7 +60,7 @@ struct jit_ctx { const struct bpf_prog *prog; int idx; int tmp_used; - int body_offset; + int epilogue_offset; int *offset; u32 *image; }; @@ -130,8 +130,8 @@ static void jit_fill_hole(void *area, unsigned int size) static inline int epilogue_offset(const struct jit_ctx *ctx) { - int to = ctx->offset[ctx->prog->len - 1]; - int from = ctx->idx - ctx->body_offset; + int to = ctx->epilogue_offset; + int from = ctx->idx; return to - from; } @@ -463,6 +463,8 @@ emit_cond_jmp: } /* function return */ case BPF_JMP | BPF_EXIT: + /* Optimization: when last instruction is EXIT, + simply fallthrough to epilogue. */ if (i == ctx->prog->len - 1) break; jmp_offset = epilogue_offset(ctx); @@ -685,11 +687,13 @@ void bpf_int_jit_compile(struct bpf_prog *prog) /* 1. Initial fake pass to compute ctx->idx. */ - /* Fake pass to fill in ctx->offset. */ + /* Fake pass to fill in ctx->offset and ctx->tmp_used. */ if (build_body(&ctx)) goto out; build_prologue(&ctx); + + ctx.epilogue_offset = ctx.idx; build_epilogue(&ctx); /* Now we know the actual image size. */ @@ -706,7 +710,6 @@ void bpf_int_jit_compile(struct bpf_prog *prog) build_prologue(&ctx); - ctx.body_offset = ctx.idx; if (build_body(&ctx)) { bpf_jit_binary_free(header); goto out; From 06f9eb884be81431d54d7d37390043e3b5b7f14a Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Thu, 4 Dec 2014 01:17:01 +0000 Subject: [PATCH 66/70] arm64: Provide a namespace to NCAPS Building arm64.allmodconfig leads to the following warning: usb/gadget/function/f_ncm.c:203:0: warning: "NCAPS" redefined #define NCAPS (USB_CDC_NCM_NCAP_ETH_FILTER | USB_CDC_NCM_NCAP_CRC_MODE) ^ In file included from /home/build/work/batch/arch/arm64/include/asm/io.h:32:0, from /home/build/work/batch/include/linux/clocksource.h:19, from /home/build/work/batch/include/clocksource/arm_arch_timer.h:19, from /home/build/work/batch/arch/arm64/include/asm/arch_timer.h:27, from /home/build/work/batch/arch/arm64/include/asm/timex.h:19, from /home/build/work/batch/include/linux/timex.h:65, from /home/build/work/batch/include/linux/sched.h:19, from /home/build/work/batch/arch/arm64/include/asm/compat.h:25, from /home/build/work/batch/arch/arm64/include/asm/stat.h:23, from /home/build/work/batch/include/linux/stat.h:5, from /home/build/work/batch/include/linux/module.h:10, from /home/build/work/batch/drivers/usb/gadget/function/f_ncm.c:19: arch/arm64/include/asm/cpufeature.h:27:0: note: this is the location of the previous definition #define NCAPS 2 So add a ARM64 prefix to avoid such problem. Reported-by: Olof's autobuilder Signed-off-by: Fabio Estevam Signed-off-by: Will Deacon --- arch/arm64/include/asm/cpufeature.h | 10 +++++----- arch/arm64/kernel/setup.c | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 0362f8020d46..07547ccc1f2b 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -24,11 +24,11 @@ #define ARM64_WORKAROUND_CLEAN_CACHE 0 #define ARM64_WORKAROUND_DEVICE_LOAD_ACQUIRE 1 -#define NCAPS 2 +#define ARM64_NCAPS 2 #ifndef __ASSEMBLY__ -extern DECLARE_BITMAP(cpu_hwcaps, NCAPS); +extern DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS); static inline bool cpu_have_feature(unsigned int num) { @@ -37,16 +37,16 @@ static inline bool cpu_have_feature(unsigned int num) static inline bool cpus_have_cap(unsigned int num) { - if (num >= NCAPS) + if (num >= ARM64_NCAPS) return false; return test_bit(num, cpu_hwcaps); } static inline void cpus_set_cap(unsigned int num) { - if (num >= NCAPS) + if (num >= ARM64_NCAPS) pr_warn("Attempt to set an illegal CPU capability (%d >= %d)\n", - num, NCAPS); + num, ARM64_NCAPS); else __set_bit(num, cpu_hwcaps); } diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 1de0e8a895ee..b80991166754 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -80,7 +80,7 @@ unsigned int compat_elf_hwcap __read_mostly = COMPAT_ELF_HWCAP_DEFAULT; unsigned int compat_elf_hwcap2 __read_mostly; #endif -DECLARE_BITMAP(cpu_hwcaps, NCAPS); +DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS); static const char *cpu_name; phys_addr_t __fdt_pointer __initdata; From af2c632e234f7158e891c27cc2270f8843f08855 Mon Sep 17 00:00:00 2001 From: Chunyan Zhang Date: Thu, 4 Dec 2014 06:29:35 +0000 Subject: [PATCH 67/70] arm64/include/asm: Fixed a warning about 'struct pt_regs' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If I include asm/irq.h on the top of my code, and set ARCH=arm64, I'll get a compile warning, details are below: warning: ‘struct pt_regs’ declared inside parameter list [enabled by default] This patch is suggested by Arnd, see: http://lists.infradead.org/pipermail/linux-arm-kernel/2014-December/308270.html Signed-off-by: Chunyan Zhang Signed-off-by: Will Deacon --- arch/arm64/include/asm/irq.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/include/asm/irq.h b/arch/arm64/include/asm/irq.h index 1eebf5bb0b58..94c53674a31d 100644 --- a/arch/arm64/include/asm/irq.h +++ b/arch/arm64/include/asm/irq.h @@ -3,6 +3,8 @@ #include +struct pt_regs; + extern void migrate_irqs(void); extern void set_handle_irq(void (*handle_irq)(struct pt_regs *)); From cbbf2e6ed7c2adabfa5cc64901c7b89e029d1e20 Mon Sep 17 00:00:00 2001 From: Daniel Thompson Date: Fri, 21 Nov 2014 16:24:27 +0000 Subject: [PATCH 68/70] arm64: perf: Prevent wraparound during overflow If the overflow threshold for a counter is set above or near the 0xffffffff boundary then the kernel may lose track of the overflow causing only events that occur *after* the overflow to be recorded. Specifically the problem occurs when the value of the performance counter overtakes its original programmed value due to wrap around. Typical solutions to this problem are either to avoid programming in values likely to be overtaken or to treat the overflow bit as the 33rd bit of the counter. Its somewhat fiddly to refactor the code to correctly handle the 33rd bit during irqsave sections (context switches for example) so instead we take the simpler approach of avoiding values likely to be overtaken. We set the limit to half of max_period because this matches the limit imposed in __hw_perf_event_init(). This causes a doubling of the interrupt rate for large threshold values, however even with a very fast counter ticking at 4GHz the interrupt rate would only be ~1Hz. Signed-off-by: Daniel Thompson Signed-off-by: Will Deacon --- arch/arm64/kernel/perf_event.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index aa29ecb4f800..25a5308744b1 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -169,8 +169,14 @@ armpmu_event_set_period(struct perf_event *event, ret = 1; } - if (left > (s64)armpmu->max_period) - left = armpmu->max_period; + /* + * Limit the maximum period to prevent the counter value + * from overtaking the one we are about to program. In + * effect we are reducing max_period to account for + * interrupt latency (and we are being very conservative). + */ + if (left > (armpmu->max_period >> 1)) + left = armpmu->max_period >> 1; local64_set(&hwc->prev_count, (u64)-left); From 932ded4b0b9bf111fbf9d176ec12152a0d29b0fd Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Fri, 28 Nov 2014 13:40:45 +0000 Subject: [PATCH 69/70] arm64: add module support for alternatives fixups Currently the kernel patches all necessary instructions once at boot time, so modules are not covered by this. Change the apply_alternatives() function to take a beginning and an end pointer and introduce a new variant (apply_alternatives_all()) to cover the existing use case for the static kernel image section. Add a module_finalize() function to arm64 to check for an alternatives section in a module and patch only the instructions from that specific area. Since that module code is not touched before the module initialization has ended, we don't need to halt the machine before doing the patching in the module's code. Signed-off-by: Andre Przywara Signed-off-by: Will Deacon --- arch/arm64/include/asm/alternative.h | 3 ++- arch/arm64/kernel/alternative.c | 29 ++++++++++++++++++++++++---- arch/arm64/kernel/module.c | 18 +++++++++++++++++ arch/arm64/kernel/smp.c | 2 +- 4 files changed, 46 insertions(+), 6 deletions(-) diff --git a/arch/arm64/include/asm/alternative.h b/arch/arm64/include/asm/alternative.h index f6d206e7f9e9..d261f01e2bae 100644 --- a/arch/arm64/include/asm/alternative.h +++ b/arch/arm64/include/asm/alternative.h @@ -13,7 +13,8 @@ struct alt_instr { u8 alt_len; /* size of new instruction(s), <= orig_len */ }; -void apply_alternatives(void); +void apply_alternatives_all(void); +void apply_alternatives(void *start, size_t length); void free_alternatives_memory(void); #define ALTINSTR_ENTRY(feature) \ diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c index 1a3badab800a..ad7821d64a1d 100644 --- a/arch/arm64/kernel/alternative.c +++ b/arch/arm64/kernel/alternative.c @@ -28,12 +28,18 @@ extern struct alt_instr __alt_instructions[], __alt_instructions_end[]; -static int __apply_alternatives(void *dummy) +struct alt_region { + struct alt_instr *begin; + struct alt_instr *end; +}; + +static int __apply_alternatives(void *alt_region) { struct alt_instr *alt; + struct alt_region *region = alt_region; u8 *origptr, *replptr; - for (alt = __alt_instructions; alt < __alt_instructions_end; alt++) { + for (alt = region->begin; alt < region->end; alt++) { if (!cpus_have_cap(alt->cpufeature)) continue; @@ -51,10 +57,25 @@ static int __apply_alternatives(void *dummy) return 0; } -void apply_alternatives(void) +void apply_alternatives_all(void) { + struct alt_region region = { + .begin = __alt_instructions, + .end = __alt_instructions_end, + }; + /* better not try code patching on a live SMP system */ - stop_machine(__apply_alternatives, NULL, NULL); + stop_machine(__apply_alternatives, ®ion, NULL); +} + +void apply_alternatives(void *start, size_t length) +{ + struct alt_region region = { + .begin = start, + .end = start + length, + }; + + __apply_alternatives(®ion); } void free_alternatives_memory(void) diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c index 1eb1cc955139..fd027b101de5 100644 --- a/arch/arm64/kernel/module.c +++ b/arch/arm64/kernel/module.c @@ -26,6 +26,7 @@ #include #include #include +#include #define AARCH64_INSN_IMM_MOVNZ AARCH64_INSN_IMM_MAX #define AARCH64_INSN_IMM_MOVK AARCH64_INSN_IMM_16 @@ -394,3 +395,20 @@ overflow: me->name, (int)ELF64_R_TYPE(rel[i].r_info), val); return -ENOEXEC; } + +int module_finalize(const Elf_Ehdr *hdr, + const Elf_Shdr *sechdrs, + struct module *me) +{ + const Elf_Shdr *s, *se; + const char *secstrs = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; + + for (s = sechdrs, se = sechdrs + hdr->e_shnum; s < se; s++) { + if (strcmp(".altinstructions", secstrs + s->sh_name) == 0) { + apply_alternatives((void *)s->sh_addr, s->sh_size); + return 0; + } + } + + return 0; +} diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 0ef87896e4ae..7ae6ee085261 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -310,7 +310,7 @@ void cpu_die(void) void __init smp_cpus_done(unsigned int max_cpus) { pr_info("SMP: Total of %d processors activated.\n", num_online_cpus()); - apply_alternatives(); + apply_alternatives_all(); } void __init smp_prepare_boot_cpu(void) From eb8a653137b7e74f7cdc01f814eb9d094a65aed9 Mon Sep 17 00:00:00 2001 From: Ding Tianhong Date: Fri, 5 Dec 2014 08:39:46 +0000 Subject: [PATCH 70/70] arm64: remove the unnecessary arm64_swiotlb_init() The commit 3690951fc6d42f3a0903987677d0e592c49dd8db (arm64: Use swiotlb late initialisation) switches the DMA mapping code to swiotlb_tlb_late_init_with_default_size(), the arm64_swiotlb_init() will not used anymore, so remove this function. Signed-off-by: Ding Tianhong Signed-off-by: Will Deacon --- arch/arm64/mm/mm.h | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm64/mm/mm.h b/arch/arm64/mm/mm.h index d519f4f50c8c..50c3351df9c7 100644 --- a/arch/arm64/mm/mm.h +++ b/arch/arm64/mm/mm.h @@ -1,2 +1 @@ extern void __init bootmem_init(void); -extern void __init arm64_swiotlb_init(void);