From 4369ae16eec16e6a922d4333da88a58fbc201369 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Thu, 6 Nov 2008 13:23:06 +0000 Subject: [PATCH 1/9] Add "thumbee" to the hwcap_str array This part was missed in the initial patch adding ThumbEE support. Signed-off-by: Catalin Marinas --- arch/arm/kernel/setup.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index 1f1eecca7f55..66e1a643ed14 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -772,6 +772,7 @@ static const char *hwcap_str[] = { "java", "iwmmxt", "crunch", + "thumbee", NULL }; From 2bedbdf4148ebbe48c7a89449ab52e475a788f42 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Thu, 6 Nov 2008 13:23:07 +0000 Subject: [PATCH 2/9] Add HWCAP_NEON to the ARM hwcap.h file Signed-off-by: Catalin Marinas --- arch/arm/include/asm/hwcap.h | 1 + arch/arm/kernel/setup.c | 1 + arch/arm/vfp/vfpmodule.c | 9 +++++++++ 3 files changed, 11 insertions(+) diff --git a/arch/arm/include/asm/hwcap.h b/arch/arm/include/asm/hwcap.h index 81f4c899a555..bda489f9f017 100644 --- a/arch/arm/include/asm/hwcap.h +++ b/arch/arm/include/asm/hwcap.h @@ -16,6 +16,7 @@ #define HWCAP_IWMMXT 512 #define HWCAP_CRUNCH 1024 #define HWCAP_THUMBEE 2048 +#define HWCAP_NEON 4096 #if defined(__KERNEL__) && !defined(__ASSEMBLY__) /* diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index 66e1a643ed14..d4dae3e9b294 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -773,6 +773,7 @@ static const char *hwcap_str[] = { "iwmmxt", "crunch", "thumbee", + "neon", NULL }; diff --git a/arch/arm/vfp/vfpmodule.c b/arch/arm/vfp/vfpmodule.c index c0d2c9bb952b..67ca340a7c85 100644 --- a/arch/arm/vfp/vfpmodule.c +++ b/arch/arm/vfp/vfpmodule.c @@ -371,6 +371,15 @@ static int __init vfp_init(void) * in place; report VFP support to userspace. */ elf_hwcap |= HWCAP_VFP; +#ifdef CONFIG_NEON + /* + * Check for the presence of the Advanced SIMD + * load/store instructions, integer and single + * precision floating point operations. + */ + if ((fmrx(MVFR1) & 0x000fff00) == 0x00011100) + elf_hwcap |= HWCAP_NEON; +#endif } return 0; } From c30c2f99e10b6a810dae9a25b35c6d48796d8ffb Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Thu, 6 Nov 2008 13:23:07 +0000 Subject: [PATCH 3/9] ARMv7: Add extra barriers for flush_cache_all compressed/head.S The flush_cache_all function on ARMv7 is implemented as a series of cache operations by set/way. These are not guaranteed to be ordered with previous memory accesses, requiring a DMB. This patch also adds barriers for the TLB operations in compressed/head.S Signed-off-by: Catalin Marinas --- arch/arm/boot/compressed/head.S | 14 ++++++++++---- arch/arm/mm/cache-v7.S | 2 ++ 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S index 84a1e0496a3c..7b1f31295a0a 100644 --- a/arch/arm/boot/compressed/head.S +++ b/arch/arm/boot/compressed/head.S @@ -717,6 +717,9 @@ __armv7_mmu_cache_off: bl __armv7_mmu_cache_flush mov r0, #0 mcr p15, 0, r0, c8, c7, 0 @ invalidate whole TLB + mcr p15, 0, r0, c7, c5, 6 @ invalidate BTC + mcr p15, 0, r0, c7, c10, 4 @ DSB + mcr p15, 0, r0, c7, c5, 4 @ ISB mov pc, r12 __arm6_mmu_cache_off: @@ -778,12 +781,13 @@ __armv6_mmu_cache_flush: __armv7_mmu_cache_flush: mrc p15, 0, r10, c0, c1, 5 @ read ID_MMFR1 tst r10, #0xf << 16 @ hierarchical cache (ARMv7) - beq hierarchical mov r10, #0 + beq hierarchical mcr p15, 0, r10, c7, c14, 0 @ clean+invalidate D b iflush hierarchical: - stmfd sp!, {r0-r5, r7, r9-r11} + mcr p15, 0, r10, c7, c10, 5 @ DMB + stmfd sp!, {r0-r5, r7, r9, r11} mrc p15, 1, r0, c0, c0, 1 @ read clidr ands r3, r0, #0x7000000 @ extract loc from clidr mov r3, r3, lsr #23 @ left align loc bit field @@ -820,12 +824,14 @@ skip: cmp r3, r10 bgt loop1 finished: + ldmfd sp!, {r0-r5, r7, r9, r11} mov r10, #0 @ swith back to cache level 0 mcr p15, 2, r10, c0, c0, 0 @ select current cache level in cssr - ldmfd sp!, {r0-r5, r7, r9-r11} iflush: + mcr p15, 0, r10, c7, c10, 4 @ DSB mcr p15, 0, r10, c7, c5, 0 @ invalidate I+BTB - mcr p15, 0, r10, c7, c10, 4 @ drain WB + mcr p15, 0, r10, c7, c10, 4 @ DSB + mcr p15, 0, r10, c7, c5, 4 @ ISB mov pc, lr __armv5tej_mmu_cache_flush: diff --git a/arch/arm/mm/cache-v7.S b/arch/arm/mm/cache-v7.S index d19c2bec2b1f..be93ff02a98d 100644 --- a/arch/arm/mm/cache-v7.S +++ b/arch/arm/mm/cache-v7.S @@ -26,6 +26,7 @@ * - mm - mm_struct describing address space */ ENTRY(v7_flush_dcache_all) + dmb @ ensure ordering with previous memory accesses mrc p15, 1, r0, c0, c0, 1 @ read clidr ands r3, r0, #0x7000000 @ extract loc from clidr mov r3, r3, lsr #23 @ left align loc bit field @@ -64,6 +65,7 @@ skip: finished: mov r10, #0 @ swith back to cache level 0 mcr p15, 2, r10, c0, c0, 0 @ select current cache level in cssr + dsb isb mov pc, lr ENDPROC(v7_flush_dcache_all) From 24b647a042b988b017e6cdf60b47a0bfecd1dc41 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Thu, 6 Nov 2008 13:23:08 +0000 Subject: [PATCH 4/9] ARMv7: Branch over conditional undefined instructions in vfphw.S On ARMv7, conditional undefined instructions may generate exceptions even if the condition is not met. The vfphw.S contains the FPINST and FPINST2 access instructions which may not be present on processors with synchronous VFP exceptions. Signed-off-by: Catalin Marinas --- arch/arm/vfp/vfphw.S | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/arch/arm/vfp/vfphw.S b/arch/arm/vfp/vfphw.S index a62dcf7098ba..3c73aafe3e01 100644 --- a/arch/arm/vfp/vfphw.S +++ b/arch/arm/vfp/vfphw.S @@ -101,9 +101,12 @@ ENTRY(vfp_support_entry) VFPFSTMIA r4, r5 @ save the working registers VFPFMRX r5, FPSCR @ current status tst r1, #FPEXC_EX @ is there additional state to save? - VFPFMRX r6, FPINST, NE @ FPINST (only if FPEXC.EX is set) - tstne r1, #FPEXC_FP2V @ is there an FPINST2 to read? - VFPFMRX r8, FPINST2, NE @ FPINST2 if needed (and present) + beq 1f + VFPFMRX r6, FPINST @ FPINST (only if FPEXC.EX is set) + tst r1, #FPEXC_FP2V @ is there an FPINST2 to read? + beq 1f + VFPFMRX r8, FPINST2 @ FPINST2 if needed (and present) +1: stmia r4, {r1, r5, r6, r8} @ save FPEXC, FPSCR, FPINST, FPINST2 @ and point r4 at the word at the @ start of the register dump @@ -117,9 +120,12 @@ no_old_VFP_process: @ FPEXC is in a safe state ldmia r10, {r1, r5, r6, r8} @ load FPEXC, FPSCR, FPINST, FPINST2 tst r1, #FPEXC_EX @ is there additional state to restore? - VFPFMXR FPINST, r6, NE @ restore FPINST (only if FPEXC.EX is set) - tstne r1, #FPEXC_FP2V @ is there an FPINST2 to write? - VFPFMXR FPINST2, r8, NE @ FPINST2 if needed (and present) + beq 1f + VFPFMXR FPINST, r6 @ restore FPINST (only if FPEXC.EX is set) + tst r1, #FPEXC_FP2V @ is there an FPINST2 to write? + beq 1f + VFPFMXR FPINST2, r8 @ FPINST2 if needed (and present) +1: VFPFMXR FPSCR, r5 @ restore status check_for_exception: @@ -175,9 +181,12 @@ ENTRY(vfp_save_state) VFPFSTMIA r0, r2 @ save the working registers VFPFMRX r2, FPSCR @ current status tst r1, #FPEXC_EX @ is there additional state to save? - VFPFMRX r3, FPINST, NE @ FPINST (only if FPEXC.EX is set) - tstne r1, #FPEXC_FP2V @ is there an FPINST2 to read? - VFPFMRX r12, FPINST2, NE @ FPINST2 if needed (and present) + beq 1f + VFPFMRX r3, FPINST @ FPINST (only if FPEXC.EX is set) + tst r1, #FPEXC_FP2V @ is there an FPINST2 to read? + beq 1f + VFPFMRX r12, FPINST2 @ FPINST2 if needed (and present) +1: stmia r0, {r1, r2, r3, r12} @ save FPEXC, FPSCR, FPINST, FPINST2 mov pc, lr ENDPROC(vfp_save_state) From 376e14218d3d791127e9b9bfbe2f99c44c2a19c2 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Thu, 6 Nov 2008 13:23:08 +0000 Subject: [PATCH 5/9] Do not flush the cache in flush_cache_v(un)map for VIPT caches In case of non-aliasing VIPT caches, there is no need to flush the whole cache when new mapping is created. The patch introduces this condition check. In the non-aliasing VIPT case flush_cache_vmap() needs a DSB since the set_pte_at() function called from vmap_pte_range() does not have such barrier (done usually via TLB flushing functions). Signed-off-by: Catalin Marinas --- arch/arm/include/asm/cacheflush.h | 36 ++++++++++++++++++++++--------- 1 file changed, 26 insertions(+), 10 deletions(-) diff --git a/arch/arm/include/asm/cacheflush.h b/arch/arm/include/asm/cacheflush.h index de6c59f814a1..85a2514cbffc 100644 --- a/arch/arm/include/asm/cacheflush.h +++ b/arch/arm/include/asm/cacheflush.h @@ -15,6 +15,7 @@ #include #include +#include #define CACHE_COLOUR(vaddr) ((vaddr & (SHMLBA - 1)) >> PAGE_SHIFT) @@ -295,16 +296,6 @@ static inline void outer_flush_range(unsigned long start, unsigned long end) #endif -/* - * flush_cache_vmap() is used when creating mappings (eg, via vmap, - * vmalloc, ioremap etc) in kernel space for pages. Since the - * direct-mappings of these pages may contain cached data, we need - * to do a full cache flush to ensure that writebacks don't corrupt - * data placed into these pages via the new mappings. - */ -#define flush_cache_vmap(start, end) flush_cache_all() -#define flush_cache_vunmap(start, end) flush_cache_all() - /* * Copy user data from/to a page which is mapped into a different * processes address space. Really, we want to allow our "user @@ -444,4 +435,29 @@ static inline void flush_ioremap_region(unsigned long phys, void __iomem *virt, dmac_inv_range(start, start + size); } +/* + * flush_cache_vmap() is used when creating mappings (eg, via vmap, + * vmalloc, ioremap etc) in kernel space for pages. On non-VIPT + * caches, since the direct-mappings of these pages may contain cached + * data, we need to do a full cache flush to ensure that writebacks + * don't corrupt data placed into these pages via the new mappings. + */ +static inline void flush_cache_vmap(unsigned long start, unsigned long end) +{ + if (!cache_is_vipt_nonaliasing()) + flush_cache_all(); + else + /* + * set_pte_at() called from vmap_pte_range() does not + * have a DSB after cleaning the cache line. + */ + dsb(); +} + +static inline void flush_cache_vunmap(unsigned long start, unsigned long end) +{ + if (!cache_is_vipt_nonaliasing()) + flush_cache_all(); +} + #endif From 6b07d7fea0496374ff7754dc3d1dca03b2911828 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Thu, 6 Nov 2008 13:23:08 +0000 Subject: [PATCH 6/9] ARMv7: Do not set TTBR0 in __v7_setup This register is set in __enable_mmu in the head.S file. Signed-off-by: Catalin Marinas --- arch/arm/mm/proc-v7.S | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S index 07f82db70945..41772960fd10 100644 --- a/arch/arm/mm/proc-v7.S +++ b/arch/arm/mm/proc-v7.S @@ -175,7 +175,6 @@ __v7_setup: mcr p15, 0, r10, c8, c7, 0 @ invalidate I + D TLBs mcr p15, 0, r10, c2, c0, 2 @ TTB control register orr r4, r4, #TTB_RGN_OC_WB @ mark PTWs outer cacheable, WB - mcr p15, 0, r4, c2, c0, 0 @ load TTB0 mcr p15, 0, r4, c2, c0, 1 @ load TTB1 mov r10, #0x1f @ domains 0, 1 = manager mcr p15, 0, r10, c3, c0, 0 @ load domain access register From 73b63efaac7352c9e2bf1570fac98fd44a99f8f9 Mon Sep 17 00:00:00 2001 From: Jon Callan Date: Thu, 6 Nov 2008 13:23:09 +0000 Subject: [PATCH 7/9] ARMv7: Add SMP initialisation to proc-v7.S This patch adds the SMP/nAMP mode setting to __v7_setup and also sets TTBR to shared page table walks if SMP is enabled. The PTWs are also marked inner cacheable for both SMP and UP modes (setting this is fine even if the CPU doesn't support the feature). Signed-off-by: Jon Callan Signed-off-by: Catalin Marinas --- arch/arm/mm/proc-v7.S | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S index 41772960fd10..721b7d53bfd8 100644 --- a/arch/arm/mm/proc-v7.S +++ b/arch/arm/mm/proc-v7.S @@ -20,9 +20,17 @@ #define TTB_C (1 << 0) #define TTB_S (1 << 1) +#define TTB_RGN_NC (0 << 3) +#define TTB_RGN_OC_WBWA (1 << 3) #define TTB_RGN_OC_WT (2 << 3) #define TTB_RGN_OC_WB (3 << 3) +#ifndef CONFIG_SMP +#define TTB_FLAGS TTB_C|TTB_RGN_OC_WB @ mark PTWs cacheable, outer WB +#else +#define TTB_FLAGS TTB_C|TTB_S|TTB_RGN_OC_WBWA @ mark PTWs cacheable and shared, outer WBWA +#endif + ENTRY(cpu_v7_proc_init) mov pc, lr ENDPROC(cpu_v7_proc_init) @@ -85,7 +93,7 @@ ENTRY(cpu_v7_switch_mm) #ifdef CONFIG_MMU mov r2, #0 ldr r1, [r1, #MM_CONTEXT_ID] @ get mm->context.id - orr r0, r0, #TTB_RGN_OC_WB @ mark PTWs outer cacheable, WB + orr r0, r0, #TTB_FLAGS mcr p15, 0, r2, c13, c0, 1 @ set reserved context ID isb 1: mcr p15, 0, r0, c2, c0, 0 @ set TTB 0 @@ -162,6 +170,11 @@ cpu_v7_name: * - cache type register is implemented */ __v7_setup: +#ifdef CONFIG_SMP + mrc p15, 0, r0, c1, c0, 1 @ Enable SMP/nAMP mode + orr r0, r0, #(0x1 << 6) + mcr p15, 0, r0, c1, c0, 1 +#endif adr r12, __v7_setup_stack @ the local stack stmia r12, {r0-r5, r7, r9, r11, lr} bl v7_flush_dcache_all @@ -174,7 +187,7 @@ __v7_setup: #ifdef CONFIG_MMU mcr p15, 0, r10, c8, c7, 0 @ invalidate I + D TLBs mcr p15, 0, r10, c2, c0, 2 @ TTB control register - orr r4, r4, #TTB_RGN_OC_WB @ mark PTWs outer cacheable, WB + orr r4, r4, #TTB_FLAGS mcr p15, 0, r4, c2, c0, 1 @ load TTB1 mov r10, #0x1f @ domains 0, 1 = manager mcr p15, 0, r10, c3, c0, 0 @ load domain access register From 8553cb67d2318db327071018fc81084cbabccc46 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Mon, 10 Nov 2008 14:14:11 +0000 Subject: [PATCH 8/9] Modern processors may need to drain the WB before WFI Since WFI may cause the processor to enter a low-power mode, data may still be in the write buffer. This patch adds a DSB (or DWB) to the cpu_(v6|v7)_do_idle functions before the WFI. Signed-off-by: Catalin Marinas --- arch/arm/mm/proc-v6.S | 2 ++ arch/arm/mm/proc-v7.S | 1 + 2 files changed, 3 insertions(+) diff --git a/arch/arm/mm/proc-v6.S b/arch/arm/mm/proc-v6.S index 294943b85973..f0cc599facb7 100644 --- a/arch/arm/mm/proc-v6.S +++ b/arch/arm/mm/proc-v6.S @@ -71,6 +71,8 @@ ENTRY(cpu_v6_reset) * IRQs are already disabled. */ ENTRY(cpu_v6_do_idle) + mov r1, #0 + mcr p15, 0, r1, c7, c10, 4 @ DWB - WFI may enter a low-power mode mcr p15, 0, r1, c7, c0, 4 @ wait for interrupt mov pc, lr diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S index 721b7d53bfd8..0e11d9716a7d 100644 --- a/arch/arm/mm/proc-v7.S +++ b/arch/arm/mm/proc-v7.S @@ -63,6 +63,7 @@ ENDPROC(cpu_v7_reset) * IRQs are already disabled. */ ENTRY(cpu_v7_do_idle) + dsb @ WFI may enter a low-power mode wfi mov pc, lr ENDPROC(cpu_v7_do_idle) From 7f1fd31db158c95418d9cc5690ab60ecc6fb632d Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Mon, 10 Nov 2008 14:14:11 +0000 Subject: [PATCH 9/9] Fix the teehbr_read function prototype A "void" was missing inside brackets. Signed-off-by: Catalin Marinas --- arch/arm/kernel/thumbee.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/kernel/thumbee.c b/arch/arm/kernel/thumbee.c index df3f6b7ebcea..9cb7aaca159f 100644 --- a/arch/arm/kernel/thumbee.c +++ b/arch/arm/kernel/thumbee.c @@ -25,7 +25,7 @@ /* * Access to the ThumbEE Handler Base register */ -static inline unsigned long teehbr_read() +static inline unsigned long teehbr_read(void) { unsigned long v; asm("mrc p14, 6, %0, c1, c0, 0\n" : "=r" (v));