[ARM] 4584/2: ARMv7: Add Advanced SIMD (NEON) extension support

This patch enables the use of the Advanced SIMD (NEON) extension on
ARMv7. The NEON technology is a 64/128-bit hybrid SIMD architecture
for accelerating the performance of multimedia and signal processing
applications. The extension shares the registers with the VFP unit and
enabling/disabling and saving/restoring follow the same rules. In
addition, there are instructions that do not have the appropriate CP
number encoded, the checks being made in the call_fpe function.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
This commit is contained in:
Catalin Marinas 2008-01-10 19:16:17 +01:00 committed by Russell King
parent 25ebee020b
commit b5872db4a2
2 changed files with 45 additions and 0 deletions

View File

@ -966,6 +966,13 @@ config VFPv3
depends on VFP depends on VFP
default y if CPU_V7 default y if CPU_V7
config NEON
bool "Advanced SIMD (NEON) Extension support"
depends on VFPv3 && CPU_V7
help
Say Y to include support code for NEON, the ARMv7 Advanced SIMD
Extension.
endmenu endmenu
menu "Userspace binary formats" menu "Userspace binary formats"

View File

@ -480,6 +480,13 @@ __und_usr:
* co-processor instructions. However, we have to watch out * co-processor instructions. However, we have to watch out
* for the ARM6/ARM7 SWI bug. * for the ARM6/ARM7 SWI bug.
* *
* NEON is a special case that has to be handled here. Not all
* NEON instructions are co-processor instructions, so we have
* to make a special case of checking for them. Plus, there's
* five groups of them, so we have a table of mask/opcode pairs
* to check against, and if any match then we branch off into the
* NEON handler code.
*
* Emulators may wish to make use of the following registers: * Emulators may wish to make use of the following registers:
* r0 = instruction opcode. * r0 = instruction opcode.
* r2 = PC+4 * r2 = PC+4
@ -488,6 +495,23 @@ __und_usr:
* lr = unrecognised instruction return address * lr = unrecognised instruction return address
*/ */
call_fpe: call_fpe:
#ifdef CONFIG_NEON
adr r6, .LCneon_opcodes
2:
ldr r7, [r6], #4 @ mask value
cmp r7, #0 @ end mask?
beq 1f
and r8, r0, r7
ldr r7, [r6], #4 @ opcode bits matching in mask
cmp r8, r7 @ NEON instruction?
bne 2b
get_thread_info r10
mov r7, #1
strb r7, [r10, #TI_USED_CP + 10] @ mark CP#10 as used
strb r7, [r10, #TI_USED_CP + 11] @ mark CP#11 as used
b do_vfp @ let VFP handler handle this
1:
#endif
tst r0, #0x08000000 @ only CDP/CPRT/LDC/STC have bit 27 tst r0, #0x08000000 @ only CDP/CPRT/LDC/STC have bit 27
#if defined(CONFIG_CPU_ARM610) || defined(CONFIG_CPU_ARM710) #if defined(CONFIG_CPU_ARM610) || defined(CONFIG_CPU_ARM710)
and r8, r0, #0x0f000000 @ mask out op-code bits and r8, r0, #0x0f000000 @ mask out op-code bits
@ -537,6 +561,20 @@ call_fpe:
mov pc, lr @ CP#14 (Debug) mov pc, lr @ CP#14 (Debug)
mov pc, lr @ CP#15 (Control) mov pc, lr @ CP#15 (Control)
#ifdef CONFIG_NEON
.align 6
.LCneon_opcodes:
.word 0xfe000000 @ mask
.word 0xf2000000 @ opcode
.word 0xff100000 @ mask
.word 0xf4000000 @ opcode
.word 0x00000000 @ mask
.word 0x00000000 @ opcode
#endif
do_fpe: do_fpe:
enable_irq enable_irq
ldr r4, .LCfp ldr r4, .LCfp