[ARM] 4584/2: ARMv7: Add Advanced SIMD (NEON) extension support
This patch enables the use of the Advanced SIMD (NEON) extension on ARMv7. The NEON technology is a 64/128-bit hybrid SIMD architecture for accelerating the performance of multimedia and signal processing applications. The extension shares the registers with the VFP unit and enabling/disabling and saving/restoring follow the same rules. In addition, there are instructions that do not have the appropriate CP number encoded, the checks being made in the call_fpe function. Signed-off-by: Catalin Marinas <catalin.marinas@arm.com> Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
This commit is contained in:
parent
25ebee020b
commit
b5872db4a2
|
@ -966,6 +966,13 @@ config VFPv3
|
||||||
depends on VFP
|
depends on VFP
|
||||||
default y if CPU_V7
|
default y if CPU_V7
|
||||||
|
|
||||||
|
config NEON
|
||||||
|
bool "Advanced SIMD (NEON) Extension support"
|
||||||
|
depends on VFPv3 && CPU_V7
|
||||||
|
help
|
||||||
|
Say Y to include support code for NEON, the ARMv7 Advanced SIMD
|
||||||
|
Extension.
|
||||||
|
|
||||||
endmenu
|
endmenu
|
||||||
|
|
||||||
menu "Userspace binary formats"
|
menu "Userspace binary formats"
|
||||||
|
|
|
@ -480,6 +480,13 @@ __und_usr:
|
||||||
* co-processor instructions. However, we have to watch out
|
* co-processor instructions. However, we have to watch out
|
||||||
* for the ARM6/ARM7 SWI bug.
|
* for the ARM6/ARM7 SWI bug.
|
||||||
*
|
*
|
||||||
|
* NEON is a special case that has to be handled here. Not all
|
||||||
|
* NEON instructions are co-processor instructions, so we have
|
||||||
|
* to make a special case of checking for them. Plus, there's
|
||||||
|
* five groups of them, so we have a table of mask/opcode pairs
|
||||||
|
* to check against, and if any match then we branch off into the
|
||||||
|
* NEON handler code.
|
||||||
|
*
|
||||||
* Emulators may wish to make use of the following registers:
|
* Emulators may wish to make use of the following registers:
|
||||||
* r0 = instruction opcode.
|
* r0 = instruction opcode.
|
||||||
* r2 = PC+4
|
* r2 = PC+4
|
||||||
|
@ -488,6 +495,23 @@ __und_usr:
|
||||||
* lr = unrecognised instruction return address
|
* lr = unrecognised instruction return address
|
||||||
*/
|
*/
|
||||||
call_fpe:
|
call_fpe:
|
||||||
|
#ifdef CONFIG_NEON
|
||||||
|
adr r6, .LCneon_opcodes
|
||||||
|
2:
|
||||||
|
ldr r7, [r6], #4 @ mask value
|
||||||
|
cmp r7, #0 @ end mask?
|
||||||
|
beq 1f
|
||||||
|
and r8, r0, r7
|
||||||
|
ldr r7, [r6], #4 @ opcode bits matching in mask
|
||||||
|
cmp r8, r7 @ NEON instruction?
|
||||||
|
bne 2b
|
||||||
|
get_thread_info r10
|
||||||
|
mov r7, #1
|
||||||
|
strb r7, [r10, #TI_USED_CP + 10] @ mark CP#10 as used
|
||||||
|
strb r7, [r10, #TI_USED_CP + 11] @ mark CP#11 as used
|
||||||
|
b do_vfp @ let VFP handler handle this
|
||||||
|
1:
|
||||||
|
#endif
|
||||||
tst r0, #0x08000000 @ only CDP/CPRT/LDC/STC have bit 27
|
tst r0, #0x08000000 @ only CDP/CPRT/LDC/STC have bit 27
|
||||||
#if defined(CONFIG_CPU_ARM610) || defined(CONFIG_CPU_ARM710)
|
#if defined(CONFIG_CPU_ARM610) || defined(CONFIG_CPU_ARM710)
|
||||||
and r8, r0, #0x0f000000 @ mask out op-code bits
|
and r8, r0, #0x0f000000 @ mask out op-code bits
|
||||||
|
@ -537,6 +561,20 @@ call_fpe:
|
||||||
mov pc, lr @ CP#14 (Debug)
|
mov pc, lr @ CP#14 (Debug)
|
||||||
mov pc, lr @ CP#15 (Control)
|
mov pc, lr @ CP#15 (Control)
|
||||||
|
|
||||||
|
#ifdef CONFIG_NEON
|
||||||
|
.align 6
|
||||||
|
|
||||||
|
.LCneon_opcodes:
|
||||||
|
.word 0xfe000000 @ mask
|
||||||
|
.word 0xf2000000 @ opcode
|
||||||
|
|
||||||
|
.word 0xff100000 @ mask
|
||||||
|
.word 0xf4000000 @ opcode
|
||||||
|
|
||||||
|
.word 0x00000000 @ mask
|
||||||
|
.word 0x00000000 @ opcode
|
||||||
|
#endif
|
||||||
|
|
||||||
do_fpe:
|
do_fpe:
|
||||||
enable_irq
|
enable_irq
|
||||||
ldr r4, .LCfp
|
ldr r4, .LCfp
|
||||||
|
|
Loading…
Reference in New Issue