arm64: add support for kernel mode NEON in interrupt context
This patch modifies kernel_neon_begin() and kernel_neon_end(), so they may be called from any context. To address the case where only a couple of registers are needed, kernel_neon_begin_partial(u32) is introduced which takes as a parameter the number of bottom 'n' NEON q-registers required. To mark the end of such a partial section, the regular kernel_neon_end() should be used. Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
This commit is contained in:
parent
005f78cd88
commit
190f1ca85d
|
@ -41,6 +41,17 @@ struct fpsimd_state {
|
||||||
unsigned int cpu;
|
unsigned int cpu;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Struct for stacking the bottom 'n' FP/SIMD registers.
|
||||||
|
*/
|
||||||
|
struct fpsimd_partial_state {
|
||||||
|
u32 fpsr;
|
||||||
|
u32 fpcr;
|
||||||
|
u32 num_regs;
|
||||||
|
__uint128_t vregs[32];
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
#if defined(__KERNEL__) && defined(CONFIG_COMPAT)
|
#if defined(__KERNEL__) && defined(CONFIG_COMPAT)
|
||||||
/* Masks for extracting the FPSR and FPCR from the FPSCR */
|
/* Masks for extracting the FPSR and FPCR from the FPSCR */
|
||||||
#define VFP_FPSCR_STAT_MASK 0xf800009f
|
#define VFP_FPSCR_STAT_MASK 0xf800009f
|
||||||
|
@ -66,6 +77,10 @@ extern void fpsimd_update_current_state(struct fpsimd_state *state);
|
||||||
|
|
||||||
extern void fpsimd_flush_task_state(struct task_struct *target);
|
extern void fpsimd_flush_task_state(struct task_struct *target);
|
||||||
|
|
||||||
|
extern void fpsimd_save_partial_state(struct fpsimd_partial_state *state,
|
||||||
|
u32 num_regs);
|
||||||
|
extern void fpsimd_load_partial_state(struct fpsimd_partial_state *state);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -62,3 +62,38 @@
|
||||||
ldr w\tmpnr, [\state, #16 * 2 + 4]
|
ldr w\tmpnr, [\state, #16 * 2 + 4]
|
||||||
msr fpcr, x\tmpnr
|
msr fpcr, x\tmpnr
|
||||||
.endm
|
.endm
|
||||||
|
|
||||||
|
.altmacro
|
||||||
|
.macro fpsimd_save_partial state, numnr, tmpnr1, tmpnr2
|
||||||
|
mrs x\tmpnr1, fpsr
|
||||||
|
str w\numnr, [\state, #8]
|
||||||
|
mrs x\tmpnr2, fpcr
|
||||||
|
stp w\tmpnr1, w\tmpnr2, [\state]
|
||||||
|
adr x\tmpnr1, 0f
|
||||||
|
add \state, \state, x\numnr, lsl #4
|
||||||
|
sub x\tmpnr1, x\tmpnr1, x\numnr, lsl #1
|
||||||
|
br x\tmpnr1
|
||||||
|
.irp qa, 30, 28, 26, 24, 22, 20, 18, 16, 14, 12, 10, 8, 6, 4, 2, 0
|
||||||
|
.irp qb, %(qa + 1)
|
||||||
|
stp q\qa, q\qb, [\state, # -16 * \qa - 16]
|
||||||
|
.endr
|
||||||
|
.endr
|
||||||
|
0:
|
||||||
|
.endm
|
||||||
|
|
||||||
|
.macro fpsimd_restore_partial state, tmpnr1, tmpnr2
|
||||||
|
ldp w\tmpnr1, w\tmpnr2, [\state]
|
||||||
|
msr fpsr, x\tmpnr1
|
||||||
|
msr fpcr, x\tmpnr2
|
||||||
|
adr x\tmpnr1, 0f
|
||||||
|
ldr w\tmpnr2, [\state, #8]
|
||||||
|
add \state, \state, x\tmpnr2, lsl #4
|
||||||
|
sub x\tmpnr1, x\tmpnr1, x\tmpnr2, lsl #1
|
||||||
|
br x\tmpnr1
|
||||||
|
.irp qa, 30, 28, 26, 24, 22, 20, 18, 16, 14, 12, 10, 8, 6, 4, 2, 0
|
||||||
|
.irp qb, %(qa + 1)
|
||||||
|
ldp q\qa, q\qb, [\state, # -16 * \qa - 16]
|
||||||
|
.endr
|
||||||
|
.endr
|
||||||
|
0:
|
||||||
|
.endm
|
||||||
|
|
|
@ -8,7 +8,11 @@
|
||||||
* published by the Free Software Foundation.
|
* published by the Free Software Foundation.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#include <linux/types.h>
|
||||||
|
|
||||||
#define cpu_has_neon() (1)
|
#define cpu_has_neon() (1)
|
||||||
|
|
||||||
void kernel_neon_begin(void);
|
#define kernel_neon_begin() kernel_neon_begin_partial(32)
|
||||||
|
|
||||||
|
void kernel_neon_begin_partial(u32 num_regs);
|
||||||
void kernel_neon_end(void);
|
void kernel_neon_end(void);
|
||||||
|
|
|
@ -41,3 +41,27 @@ ENTRY(fpsimd_load_state)
|
||||||
fpsimd_restore x0, 8
|
fpsimd_restore x0, 8
|
||||||
ret
|
ret
|
||||||
ENDPROC(fpsimd_load_state)
|
ENDPROC(fpsimd_load_state)
|
||||||
|
|
||||||
|
#ifdef CONFIG_KERNEL_MODE_NEON
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Save the bottom n FP registers.
|
||||||
|
*
|
||||||
|
* x0 - pointer to struct fpsimd_partial_state
|
||||||
|
*/
|
||||||
|
ENTRY(fpsimd_save_partial_state)
|
||||||
|
fpsimd_save_partial x0, 1, 8, 9
|
||||||
|
ret
|
||||||
|
ENDPROC(fpsimd_load_partial_state)
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Load the bottom n FP registers.
|
||||||
|
*
|
||||||
|
* x0 - pointer to struct fpsimd_partial_state
|
||||||
|
*/
|
||||||
|
ENTRY(fpsimd_load_partial_state)
|
||||||
|
fpsimd_restore_partial x0, 8, 9
|
||||||
|
ret
|
||||||
|
ENDPROC(fpsimd_load_partial_state)
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
|
@ -218,29 +218,45 @@ void fpsimd_flush_task_state(struct task_struct *t)
|
||||||
|
|
||||||
#ifdef CONFIG_KERNEL_MODE_NEON
|
#ifdef CONFIG_KERNEL_MODE_NEON
|
||||||
|
|
||||||
|
static DEFINE_PER_CPU(struct fpsimd_partial_state, hardirq_fpsimdstate);
|
||||||
|
static DEFINE_PER_CPU(struct fpsimd_partial_state, softirq_fpsimdstate);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Kernel-side NEON support functions
|
* Kernel-side NEON support functions
|
||||||
*/
|
*/
|
||||||
void kernel_neon_begin(void)
|
void kernel_neon_begin_partial(u32 num_regs)
|
||||||
{
|
{
|
||||||
/* Avoid using the NEON in interrupt context */
|
if (in_interrupt()) {
|
||||||
BUG_ON(in_interrupt());
|
struct fpsimd_partial_state *s = this_cpu_ptr(
|
||||||
preempt_disable();
|
in_irq() ? &hardirq_fpsimdstate : &softirq_fpsimdstate);
|
||||||
|
|
||||||
/*
|
BUG_ON(num_regs > 32);
|
||||||
* Save the userland FPSIMD state if we have one and if we haven't done
|
fpsimd_save_partial_state(s, roundup(num_regs, 2));
|
||||||
* so already. Clear fpsimd_last_state to indicate that there is no
|
} else {
|
||||||
* longer userland FPSIMD state in the registers.
|
/*
|
||||||
*/
|
* Save the userland FPSIMD state if we have one and if we
|
||||||
if (current->mm && !test_and_set_thread_flag(TIF_FOREIGN_FPSTATE))
|
* haven't done so already. Clear fpsimd_last_state to indicate
|
||||||
fpsimd_save_state(¤t->thread.fpsimd_state);
|
* that there is no longer userland FPSIMD state in the
|
||||||
this_cpu_write(fpsimd_last_state, NULL);
|
* registers.
|
||||||
|
*/
|
||||||
|
preempt_disable();
|
||||||
|
if (current->mm &&
|
||||||
|
!test_and_set_thread_flag(TIF_FOREIGN_FPSTATE))
|
||||||
|
fpsimd_save_state(¤t->thread.fpsimd_state);
|
||||||
|
this_cpu_write(fpsimd_last_state, NULL);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL(kernel_neon_begin);
|
EXPORT_SYMBOL(kernel_neon_begin_partial);
|
||||||
|
|
||||||
void kernel_neon_end(void)
|
void kernel_neon_end(void)
|
||||||
{
|
{
|
||||||
preempt_enable();
|
if (in_interrupt()) {
|
||||||
|
struct fpsimd_partial_state *s = this_cpu_ptr(
|
||||||
|
in_irq() ? &hardirq_fpsimdstate : &softirq_fpsimdstate);
|
||||||
|
fpsimd_load_partial_state(s);
|
||||||
|
} else {
|
||||||
|
preempt_enable();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL(kernel_neon_end);
|
EXPORT_SYMBOL(kernel_neon_end);
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue