From b660950c60a7278f9d8deb7c32a162031207c758 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Fri, 18 Mar 2016 10:58:09 +0100
Subject: [PATCH 1/7] arm64/kernel: fix incorrect EL0 check in inv_entry macro

The implementation of macro inv_entry refers to its 'el' argument without
the required leading backslash, which results in an undefined symbol
'el' to be passed into the kernel_entry macro rather than the index of
the exception level as intended.

This undefined symbol strangely enough does not result in build failures,
although it is visible in vmlinux:

     $ nm -n vmlinux |head
                      U el
     0000000000000000 A _kernel_flags_le_hi32
     0000000000000000 A _kernel_offset_le_hi32
     0000000000000000 A _kernel_size_le_hi32
     000000000000000a A _kernel_flags_le_lo32
     .....

However, it does result in incorrect code being generated for invalid
exceptions taken from EL0, since the argument check in kernel_entry
assumes EL1 if its argument does not equal '0'.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/kernel/entry.S | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 1f7f5a2b61bf..12e8d2bcb3f9 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -277,7 +277,7 @@ END(vectors)
  * Invalid mode handlers
  */
 	.macro	inv_entry, el, reason, regsize = 64
-	kernel_entry el, \regsize
+	kernel_entry \el, \regsize
 	mov	x0, sp
 	mov	x1, #\reason
 	mrs	x2, esr_el1

From b90b4a608ea2401cc491828f7a385edd2e236e37 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Tue, 15 Mar 2016 11:22:57 +0000
Subject: [PATCH 2/7] arm64: fix KASLR boot-time I-cache maintenance

Commit f80fb3a3d50843a4 ("arm64: add support for kernel ASLR") missed a
DSB necessary to complete I-cache maintenance in the primary boot path,
and hence stale instructions may still be present in the I-cache and may
be executed until the I-cache maintenance naturally completes.

Since commit 8ec41987436d566f ("arm64: mm: ensure patched kernel text is
fetched from PoU"), all CPUs invalidate their I-caches after their MMU
is enabled. Prior a CPU's MMU having been enabled, arbitrary lines may
have been fetched from the PoC into I-caches. We never patch text
expected to be executed with the MMU off. Thus, it is unnecessary to
perform broadcast I-cache maintenance in the primary boot path.

This patch reduces the scope of the I-cache maintenance to the local
CPU, and adds the missing DSB with similar scope, matching prior
maintenance in the primary boot path.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Ard Biesehvuel <ard.biesheuvel@linaro.org>
Cc: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/kernel/head.S | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 50c2134a4aaf..1672ca9e963b 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -766,8 +766,9 @@ __enable_mmu:
 
 	msr	sctlr_el1, x19			// re-enable the MMU
 	isb
-	ic	ialluis				// flush instructions fetched
-	isb					// via old mapping
+	ic	iallu				// flush instructions fetched
+	dsb	nsh				// via old mapping
+	isb
 	add	x27, x27, x23			// relocated __mmap_switched
 #endif
 	br	x27

From cc7c0cda8f8c03d6867408e3df953b2fa67d704c Mon Sep 17 00:00:00 2001
From: Kefeng Wang <wangkefeng.wang@huawei.com>
Date: Tue, 15 Mar 2016 17:47:10 +0800
Subject: [PATCH 3/7] arm64: drop unused __local_flush_icache_all()

After commit 65da0a8e34a8 ("arm64: use non-global mappings for UEFI
runtime regions"), nobody use __local_flush_icache_all() anymore,
so drop it.

Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Acked-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/cacheflush.h | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h
index 7fc294c3bc5b..6a5ecbddcace 100644
--- a/arch/arm64/include/asm/cacheflush.h
+++ b/arch/arm64/include/asm/cacheflush.h
@@ -116,13 +116,6 @@ extern void copy_to_user_page(struct vm_area_struct *, struct page *,
 #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
 extern void flush_dcache_page(struct page *);
 
-static inline void __local_flush_icache_all(void)
-{
-	asm("ic iallu");
-	dsb(nsh);
-	isb();
-}
-
 static inline void __flush_icache_all(void)
 {
 	asm("ic	ialluis");

From f09f1bacfe2b1e64a5d94bc2711f73b654c95514 Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Fri, 11 Mar 2016 17:39:25 +0000
Subject: [PATCH 4/7] arm64: Split pr_notice("Virtual kernel memory layout...")
 into multiple pr_cont()

The printk() implementation has a limit of LOG_LINE_MAX (== 1024 - 32)
buffer per call which the arm64 mem_init() breaches when printing the
virtual memory layout with CONFIG_KASAN enabled. The result is that the
last line is no longer printed. This patch splits the call into a
pr_notice() + additional pr_cont() calls.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Acked-by: Mark Rutland <mark.rutland@arm.com>
---
 arch/arm64/mm/init.c | 60 +++++++++++++++++++++-----------------------
 1 file changed, 28 insertions(+), 32 deletions(-)

diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 8c3d7dd91c25..d09603d0e5e9 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -362,42 +362,38 @@ void __init mem_init(void)
 #define MLG(b, t) b, t, ((t) - (b)) >> 30
 #define MLK_ROUNDUP(b, t) b, t, DIV_ROUND_UP(((t) - (b)), SZ_1K)
 
-	pr_notice("Virtual kernel memory layout:\n"
+	pr_notice("Virtual kernel memory layout:\n");
 #ifdef CONFIG_KASAN
-		  "    kasan   : 0x%16lx - 0x%16lx   (%6ld GB)\n"
+	pr_cont("    kasan   : 0x%16lx - 0x%16lx   (%6ld GB)\n",
+		MLG(KASAN_SHADOW_START, KASAN_SHADOW_END));
 #endif
-		  "    modules : 0x%16lx - 0x%16lx   (%6ld MB)\n"
-		  "    vmalloc : 0x%16lx - 0x%16lx   (%6ld GB)\n"
-		  "      .text : 0x%p" " - 0x%p" "   (%6ld KB)\n"
-		  "    .rodata : 0x%p" " - 0x%p" "   (%6ld KB)\n"
-		  "      .init : 0x%p" " - 0x%p" "   (%6ld KB)\n"
-		  "      .data : 0x%p" " - 0x%p" "   (%6ld KB)\n"
+	pr_cont("    modules : 0x%16lx - 0x%16lx   (%6ld MB)\n",
+		MLM(MODULES_VADDR, MODULES_END));
+	pr_cont("    vmalloc : 0x%16lx - 0x%16lx   (%6ld GB)\n",
+		MLG(VMALLOC_START, VMALLOC_END));
+	pr_cont("      .text : 0x%p" " - 0x%p" "   (%6ld KB)\n"
+		"    .rodata : 0x%p" " - 0x%p" "   (%6ld KB)\n"
+		"      .init : 0x%p" " - 0x%p" "   (%6ld KB)\n"
+		"      .data : 0x%p" " - 0x%p" "   (%6ld KB)\n",
+		MLK_ROUNDUP(_text, __start_rodata),
+		MLK_ROUNDUP(__start_rodata, _etext),
+		MLK_ROUNDUP(__init_begin, __init_end),
+		MLK_ROUNDUP(_sdata, _edata));
 #ifdef CONFIG_SPARSEMEM_VMEMMAP
-		  "    vmemmap : 0x%16lx - 0x%16lx   (%6ld GB maximum)\n"
-		  "              0x%16lx - 0x%16lx   (%6ld MB actual)\n"
+	pr_cont("    vmemmap : 0x%16lx - 0x%16lx   (%6ld GB maximum)\n"
+		"              0x%16lx - 0x%16lx   (%6ld MB actual)\n",
+		MLG((unsigned long)vmemmap,
+		    (unsigned long)vmemmap + VMEMMAP_SIZE),
+		MLM((unsigned long)phys_to_page(memblock_start_of_DRAM()),
+		    (unsigned long)virt_to_page(high_memory)));
 #endif
-		  "    fixed   : 0x%16lx - 0x%16lx   (%6ld KB)\n"
-		  "    PCI I/O : 0x%16lx - 0x%16lx   (%6ld MB)\n"
-		  "    memory  : 0x%16lx - 0x%16lx   (%6ld MB)\n",
-#ifdef CONFIG_KASAN
-		  MLG(KASAN_SHADOW_START, KASAN_SHADOW_END),
-#endif
-		  MLM(MODULES_VADDR, MODULES_END),
-		  MLG(VMALLOC_START, VMALLOC_END),
-		  MLK_ROUNDUP(_text, __start_rodata),
-		  MLK_ROUNDUP(__start_rodata, _etext),
-		  MLK_ROUNDUP(__init_begin, __init_end),
-		  MLK_ROUNDUP(_sdata, _edata),
-#ifdef CONFIG_SPARSEMEM_VMEMMAP
-		  MLG((unsigned long)vmemmap,
-		      (unsigned long)vmemmap + VMEMMAP_SIZE),
-		  MLM((unsigned long)phys_to_page(memblock_start_of_DRAM()),
-		      (unsigned long)virt_to_page(high_memory)),
-#endif
-		  MLK(FIXADDR_START, FIXADDR_TOP),
-		  MLM(PCI_IO_START, PCI_IO_END),
-		  MLM(__phys_to_virt(memblock_start_of_DRAM()),
-		      (unsigned long)high_memory));
+	pr_cont("    fixed   : 0x%16lx - 0x%16lx   (%6ld KB)\n",
+		MLK(FIXADDR_START, FIXADDR_TOP));
+	pr_cont("    PCI I/O : 0x%16lx - 0x%16lx   (%6ld MB)\n",
+		MLM(PCI_IO_START, PCI_IO_END));
+	pr_cont("    memory  : 0x%16lx - 0x%16lx   (%6ld MB)\n",
+		MLM(__phys_to_virt(memblock_start_of_DRAM()),
+		    (unsigned long)high_memory));
 
 #undef MLK
 #undef MLM

From d5e5743797adc26e09db262e7a4b6b81d8c0f45c Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Mon, 21 Mar 2016 18:35:11 +0100
Subject: [PATCH 5/7] arm64: kaslr: use callee saved register to preserve SCTLR
 across C call

The KASLR code incorrectly expects the contents of x18 to be preserved
across a call into C code, and uses it to stash the contents of SCTLR_EL1
before enabling the MMU. If the MMU needs to be disabled again to create
the randomized kernel mapping, x18 is written back to SCTLR_EL1, which is
likely to crash the system if x18 has been clobbered by kasan_early_init()
or kaslr_early_init(). So use x22 instead, which is not in use so far in
head.S

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/kernel/head.S | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 1672ca9e963b..a2cf0e3aa1ce 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -732,7 +732,7 @@ ENTRY(__early_cpu_boot_status)
  */
 	.section	".idmap.text", "ax"
 __enable_mmu:
-	mrs	x18, sctlr_el1			// preserve old SCTLR_EL1 value
+	mrs	x22, sctlr_el1			// preserve old SCTLR_EL1 value
 	mrs	x1, ID_AA64MMFR0_EL1
 	ubfx	x2, x1, #ID_AA64MMFR0_TGRAN_SHIFT, 4
 	cmp	x2, #ID_AA64MMFR0_TGRAN_SUPPORTED
@@ -760,7 +760,7 @@ __enable_mmu:
 	 * to take into account by discarding the current kernel mapping and
 	 * creating a new one.
 	 */
-	msr	sctlr_el1, x18			// disable the MMU
+	msr	sctlr_el1, x22			// disable the MMU
 	isb
 	bl	__create_page_tables		// recreate kernel mapping
 

From c661cb1c537e2364bfdabb298fb934fd77445e98 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Tue, 22 Mar 2016 10:11:45 +0000
Subject: [PATCH 6/7] arm64: consistently use p?d_set_huge

Commit 324420bf91f60582 ("arm64: add support for ioremap() block
mappings") added new p?d_set_huge functions which do the hard work to
generate and set a correct block entry.

These differ from open-coded huge page creation in the early page table
code by explicitly setting the P?D_TYPE_SECT bits (which are implicitly
retained by mk_sect_prot() for any valid prot), but are otherwise
identical (and cannot fail on arm64).

For simplicity and consistency, make use of these in the initial page
table creation code.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/mm/mmu.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index d2d8b8c2e17f..f3e5c74233f3 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -211,8 +211,7 @@ static void alloc_init_pmd(pud_t *pud, unsigned long addr, unsigned long end,
 		if (((addr | next | phys) & ~SECTION_MASK) == 0 &&
 		      block_mappings_allowed(pgtable_alloc)) {
 			pmd_t old_pmd =*pmd;
-			set_pmd(pmd, __pmd(phys |
-					   pgprot_val(mk_sect_prot(prot))));
+			pmd_set_huge(pmd, phys, prot);
 			/*
 			 * Check for previous table entries created during
 			 * boot (__create_page_tables) and flush them.
@@ -272,8 +271,7 @@ static void alloc_init_pud(pgd_t *pgd, unsigned long addr, unsigned long end,
 		if (use_1G_block(addr, next, phys) &&
 		    block_mappings_allowed(pgtable_alloc)) {
 			pud_t old_pud = *pud;
-			set_pud(pud, __pud(phys |
-					   pgprot_val(mk_sect_prot(prot))));
+			pud_set_huge(pud, phys, prot);
 
 			/*
 			 * If we have an old value for a pud, it will

From 691b1e2ebf727167a2e3cdcd1ea0851dee10247b Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Tue, 22 Mar 2016 10:11:28 +0000
Subject: [PATCH 7/7] arm64: mm: allow preemption in copy_to_user_page

Currently we disable preemption in copy_to_user_page; a behaviour that
we inherited from the 32-bit arm code. This was necessary for older
cores without broadcast data cache maintenance, and ensured that cache
lines were dirtied and cleaned by the same CPU. On these systems dirty
cache line migration was not possible, so this was sufficient to
guarantee coherency.

On contemporary systems, cache coherence protocols permit (dirty) cache
lines to migrate between CPUs as a result of speculation, prefetching,
and other behaviours. To account for this, in ARMv8 data cache
maintenance operations are broadcast and affect all data caches in the
domain associated with the VA (i.e. ISH for kernel and user mappings).

In __switch_to we ensure that tasks can be safely migrated in the middle
of a maintenance sequence, using a dsb(ish) to ensure prior explicit
memory accesses are observed and cache maintenance operations are
completed before a task can be run on another CPU.

Given the above, it is not necessary to disable preemption in
copy_to_user_page. This patch removes the preempt_{disable,enable}
calls, permitting preemption.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/mm/flush.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/arch/arm64/mm/flush.c b/arch/arm64/mm/flush.c
index 60585bde1264..dbd12ea8ce68 100644
--- a/arch/arm64/mm/flush.c
+++ b/arch/arm64/mm/flush.c
@@ -58,17 +58,13 @@ static void flush_ptrace_access(struct vm_area_struct *vma, struct page *page,
  * Copy user data from/to a page which is mapped into a different processes
  * address space.  Really, we want to allow our "user space" model to handle
  * this.
- *
- * Note that this code needs to run on the current CPU.
  */
 void copy_to_user_page(struct vm_area_struct *vma, struct page *page,
 		       unsigned long uaddr, void *dst, const void *src,
 		       unsigned long len)
 {
-	preempt_disable();
 	memcpy(dst, src, len);
 	flush_ptrace_access(vma, page, uaddr, dst, len);
-	preempt_enable();
 }
 
 void __sync_icache_dcache(pte_t pte, unsigned long addr)