Merge branch arm64/for-next/sme2 into kvmarm/next
Merge the SME2 branch to fix up a rather annoying conflict due to the EL2 finalization refactor. Signed-off-by: Oliver Upton <oliver.upton@linux.dev>
This commit is contained in:
commit
619cec0085
|
@ -369,6 +369,16 @@ Before jumping into the kernel, the following conditions must be met:
|
|||
|
||||
- HCR_EL2.ATA (bit 56) must be initialised to 0b1.
|
||||
|
||||
For CPUs with the Scalable Matrix Extension version 2 (FEAT_SME2):
|
||||
|
||||
- If EL3 is present:
|
||||
|
||||
- SMCR_EL3.EZT0 (bit 30) must be initialised to 0b1.
|
||||
|
||||
- If the kernel is entered at EL1 and EL2 is present:
|
||||
|
||||
- SMCR_EL2.EZT0 (bit 30) must be initialised to 0b1.
|
||||
|
||||
The requirements described above for CPU mode, caches, MMUs, architected
|
||||
timers, coherency and system registers apply to all CPUs. All CPUs must
|
||||
enter the kernel in the same exception level. Where the values documented
|
||||
|
|
|
@ -284,6 +284,24 @@ HWCAP2_RPRFM
|
|||
HWCAP2_SVE2P1
|
||||
Functionality implied by ID_AA64ZFR0_EL1.SVEver == 0b0010.
|
||||
|
||||
HWCAP2_SME2
|
||||
Functionality implied by ID_AA64SMFR0_EL1.SMEver == 0b0001.
|
||||
|
||||
HWCAP2_SME2P1
|
||||
Functionality implied by ID_AA64SMFR0_EL1.SMEver == 0b0010.
|
||||
|
||||
HWCAP2_SMEI16I32
|
||||
Functionality implied by ID_AA64SMFR0_EL1.I16I32 == 0b0101
|
||||
|
||||
HWCAP2_SMEBI32I32
|
||||
Functionality implied by ID_AA64SMFR0_EL1.BI32I32 == 0b1
|
||||
|
||||
HWCAP2_SMEB16B16
|
||||
Functionality implied by ID_AA64SMFR0_EL1.B16B16 == 0b1
|
||||
|
||||
HWCAP2_SMEF16F16
|
||||
Functionality implied by ID_AA64SMFR0_EL1.F16F16 == 0b1
|
||||
|
||||
4. Unused AT_HWCAP bits
|
||||
-----------------------
|
||||
|
||||
|
|
|
@ -18,14 +18,19 @@ model features for SME is included in Appendix A.
|
|||
1. General
|
||||
-----------
|
||||
|
||||
* PSTATE.SM, PSTATE.ZA, the streaming mode vector length, the ZA
|
||||
register state and TPIDR2_EL0 are tracked per thread.
|
||||
* PSTATE.SM, PSTATE.ZA, the streaming mode vector length, the ZA and (when
|
||||
present) ZTn register state and TPIDR2_EL0 are tracked per thread.
|
||||
|
||||
* The presence of SME is reported to userspace via HWCAP2_SME in the aux vector
|
||||
AT_HWCAP2 entry. Presence of this flag implies the presence of the SME
|
||||
instructions and registers, and the Linux-specific system interfaces
|
||||
described in this document. SME is reported in /proc/cpuinfo as "sme".
|
||||
|
||||
* The presence of SME2 is reported to userspace via HWCAP2_SME2 in the
|
||||
aux vector AT_HWCAP2 entry. Presence of this flag implies the presence of
|
||||
the SME2 instructions and ZT0, and the Linux-specific system interfaces
|
||||
described in this document. SME2 is reported in /proc/cpuinfo as "sme2".
|
||||
|
||||
* Support for the execution of SME instructions in userspace can also be
|
||||
detected by reading the CPU ID register ID_AA64PFR1_EL1 using an MRS
|
||||
instruction, and checking that the value of the SME field is nonzero. [3]
|
||||
|
@ -44,6 +49,7 @@ model features for SME is included in Appendix A.
|
|||
HWCAP2_SME_B16F32
|
||||
HWCAP2_SME_F32F32
|
||||
HWCAP2_SME_FA64
|
||||
HWCAP2_SME2
|
||||
|
||||
This list may be extended over time as the SME architecture evolves.
|
||||
|
||||
|
@ -52,8 +58,8 @@ model features for SME is included in Appendix A.
|
|||
cpu-feature-registers.txt for details.
|
||||
|
||||
* Debuggers should restrict themselves to interacting with the target via the
|
||||
NT_ARM_SVE, NT_ARM_SSVE and NT_ARM_ZA regsets. The recommended way
|
||||
of detecting support for these regsets is to connect to a target process
|
||||
NT_ARM_SVE, NT_ARM_SSVE, NT_ARM_ZA and NT_ARM_ZT regsets. The recommended
|
||||
way of detecting support for these regsets is to connect to a target process
|
||||
first and then attempt a
|
||||
|
||||
ptrace(PTRACE_GETREGSET, pid, NT_ARM_<regset>, &iov).
|
||||
|
@ -89,13 +95,13 @@ be zeroed.
|
|||
-------------------------
|
||||
|
||||
* On syscall PSTATE.ZA is preserved, if PSTATE.ZA==1 then the contents of the
|
||||
ZA matrix are preserved.
|
||||
ZA matrix and ZTn (if present) are preserved.
|
||||
|
||||
* On syscall PSTATE.SM will be cleared and the SVE registers will be handled
|
||||
as per the standard SVE ABI.
|
||||
|
||||
* Neither the SVE registers nor ZA are used to pass arguments to or receive
|
||||
results from any syscall.
|
||||
* None of the SVE registers, ZA or ZTn are used to pass arguments to
|
||||
or receive results from any syscall.
|
||||
|
||||
* On process creation (eg, clone()) the newly created process will have
|
||||
PSTATE.SM cleared.
|
||||
|
@ -134,6 +140,14 @@ be zeroed.
|
|||
__reserved[] referencing this space. za_context is then written in the
|
||||
extra space. Refer to [1] for further details about this mechanism.
|
||||
|
||||
* If ZTn is supported and PSTATE.ZA==1 then a signal frame record for ZTn will
|
||||
be generated.
|
||||
|
||||
* The signal record for ZTn has magic ZT_MAGIC (0x5a544e01) and consists of a
|
||||
standard signal frame header followed by a struct zt_context specifying
|
||||
the number of ZTn registers supported by the system, then zt_context.nregs
|
||||
blocks of 64 bytes of data per register.
|
||||
|
||||
|
||||
5. Signal return
|
||||
-----------------
|
||||
|
@ -151,6 +165,9 @@ When returning from a signal handler:
|
|||
the signal frame does not match the current vector length, the signal return
|
||||
attempt is treated as illegal, resulting in a forced SIGSEGV.
|
||||
|
||||
* If ZTn is not supported or PSTATE.ZA==0 then it is illegal to have a
|
||||
signal frame record for ZTn, resulting in a forced SIGSEGV.
|
||||
|
||||
|
||||
6. prctl extensions
|
||||
--------------------
|
||||
|
@ -214,8 +231,8 @@ prctl(PR_SME_SET_VL, unsigned long arg)
|
|||
vector length that will be applied at the next execve() by the calling
|
||||
thread.
|
||||
|
||||
* Changing the vector length causes all of ZA, P0..P15, FFR and all bits of
|
||||
Z0..Z31 except for Z0 bits [127:0] .. Z31 bits [127:0] to become
|
||||
* Changing the vector length causes all of ZA, ZTn, P0..P15, FFR and all
|
||||
bits of Z0..Z31 except for Z0 bits [127:0] .. Z31 bits [127:0] to become
|
||||
unspecified, including both streaming and non-streaming SVE state.
|
||||
Calling PR_SME_SET_VL with vl equal to the thread's current vector
|
||||
length, or calling PR_SME_SET_VL with the PR_SVE_SET_VL_ONEXEC flag,
|
||||
|
@ -317,6 +334,15 @@ The regset data starts with struct user_za_header, containing:
|
|||
|
||||
* The effect of writing a partial, incomplete payload is unspecified.
|
||||
|
||||
* A new regset NT_ARM_ZT is defined for access to ZTn state via
|
||||
PTRACE_GETREGSET and PTRACE_SETREGSET.
|
||||
|
||||
* The NT_ARM_ZT regset consists of a single 512 bit register.
|
||||
|
||||
* When PSTATE.ZA==0 reads of NT_ARM_ZT will report all bits of ZTn as 0.
|
||||
|
||||
* Writes to NT_ARM_ZT will set PSTATE.ZA to 1.
|
||||
|
||||
|
||||
8. ELF coredump extensions
|
||||
---------------------------
|
||||
|
@ -331,6 +357,11 @@ The regset data starts with struct user_za_header, containing:
|
|||
been read if a PTRACE_GETREGSET of NT_ARM_ZA were executed for each thread
|
||||
when the coredump was generated.
|
||||
|
||||
* A NT_ARM_ZT note will be added to each coredump for each thread of the
|
||||
dumped process. The contents will be equivalent to the data that would have
|
||||
been read if a PTRACE_GETREGSET of NT_ARM_ZT were executed for each thread
|
||||
when the coredump was generated.
|
||||
|
||||
* The NT_ARM_TLS note will be extended to two registers, the second register
|
||||
will contain TPIDR2_EL0 on systems that support SME and will be read as
|
||||
zero with writes ignored otherwise.
|
||||
|
@ -406,6 +437,9 @@ In A64 state, SME adds the following:
|
|||
For best system performance it is strongly encouraged for software to enable
|
||||
ZA only when it is actively being used.
|
||||
|
||||
* A new ZT0 register is introduced when SME2 is present. This is a 512 bit
|
||||
register which is accessible when PSTATE.ZA is set, as ZA itself is.
|
||||
|
||||
* Two new 1 bit fields in PSTATE which may be controlled via the SMSTART and
|
||||
SMSTOP instructions or by access to the SVCR system register:
|
||||
|
||||
|
|
|
@ -769,6 +769,12 @@ static __always_inline bool system_supports_sme(void)
|
|||
cpus_have_const_cap(ARM64_SME);
|
||||
}
|
||||
|
||||
static __always_inline bool system_supports_sme2(void)
|
||||
{
|
||||
return IS_ENABLED(CONFIG_ARM64_SME) &&
|
||||
cpus_have_const_cap(ARM64_SME2);
|
||||
}
|
||||
|
||||
static __always_inline bool system_supports_fa64(void)
|
||||
{
|
||||
return IS_ENABLED(CONFIG_ARM64_SME) &&
|
||||
|
|
|
@ -350,6 +350,7 @@
|
|||
#define ESR_ELx_SME_ISS_ILL 1
|
||||
#define ESR_ELx_SME_ISS_SM_DISABLED 2
|
||||
#define ESR_ELx_SME_ISS_ZA_DISABLED 3
|
||||
#define ESR_ELx_SME_ISS_ZT_DISABLED 4
|
||||
|
||||
#ifndef __ASSEMBLY__
|
||||
#include <asm/types.h>
|
||||
|
|
|
@ -61,7 +61,7 @@ extern void fpsimd_kvm_prepare(void);
|
|||
struct cpu_fp_state {
|
||||
struct user_fpsimd_state *st;
|
||||
void *sve_state;
|
||||
void *za_state;
|
||||
void *sme_state;
|
||||
u64 *svcr;
|
||||
unsigned int sve_vl;
|
||||
unsigned int sme_vl;
|
||||
|
@ -105,6 +105,13 @@ static inline void *sve_pffr(struct thread_struct *thread)
|
|||
return (char *)thread->sve_state + sve_ffr_offset(vl);
|
||||
}
|
||||
|
||||
static inline void *thread_zt_state(struct thread_struct *thread)
|
||||
{
|
||||
/* The ZT register state is stored immediately after the ZA state */
|
||||
unsigned int sme_vq = sve_vq_from_vl(thread_get_sme_vl(thread));
|
||||
return thread->sme_state + ZA_SIG_REGS_SIZE(sme_vq);
|
||||
}
|
||||
|
||||
extern void sve_save_state(void *state, u32 *pfpsr, int save_ffr);
|
||||
extern void sve_load_state(void const *state, u32 const *pfpsr,
|
||||
int restore_ffr);
|
||||
|
@ -112,12 +119,13 @@ extern void sve_flush_live(bool flush_ffr, unsigned long vq_minus_1);
|
|||
extern unsigned int sve_get_vl(void);
|
||||
extern void sve_set_vq(unsigned long vq_minus_1);
|
||||
extern void sme_set_vq(unsigned long vq_minus_1);
|
||||
extern void za_save_state(void *state);
|
||||
extern void za_load_state(void const *state);
|
||||
extern void sme_save_state(void *state, int zt);
|
||||
extern void sme_load_state(void const *state, int zt);
|
||||
|
||||
struct arm64_cpu_capabilities;
|
||||
extern void sve_kernel_enable(const struct arm64_cpu_capabilities *__unused);
|
||||
extern void sme_kernel_enable(const struct arm64_cpu_capabilities *__unused);
|
||||
extern void sme2_kernel_enable(const struct arm64_cpu_capabilities *__unused);
|
||||
extern void fa64_kernel_enable(const struct arm64_cpu_capabilities *__unused);
|
||||
|
||||
extern u64 read_zcr_features(void);
|
||||
|
@ -355,14 +363,20 @@ extern int sme_get_current_vl(void);
|
|||
|
||||
/*
|
||||
* Return how many bytes of memory are required to store the full SME
|
||||
* specific state (currently just ZA) for task, given task's currently
|
||||
* configured vector length.
|
||||
* specific state for task, given task's currently configured vector
|
||||
* length.
|
||||
*/
|
||||
static inline size_t za_state_size(struct task_struct const *task)
|
||||
static inline size_t sme_state_size(struct task_struct const *task)
|
||||
{
|
||||
unsigned int vl = task_get_sme_vl(task);
|
||||
size_t size;
|
||||
|
||||
return ZA_SIG_REGS_SIZE(sve_vq_from_vl(vl));
|
||||
size = ZA_SIG_REGS_SIZE(sve_vq_from_vl(vl));
|
||||
|
||||
if (system_supports_sme2())
|
||||
size += ZT_SIG_REG_SIZE;
|
||||
|
||||
return size;
|
||||
}
|
||||
|
||||
#else
|
||||
|
@ -382,7 +396,7 @@ static inline int sme_max_virtualisable_vl(void) { return 0; }
|
|||
static inline int sme_set_current_vl(unsigned long arg) { return -EINVAL; }
|
||||
static inline int sme_get_current_vl(void) { return -EINVAL; }
|
||||
|
||||
static inline size_t za_state_size(struct task_struct const *task)
|
||||
static inline size_t sme_state_size(struct task_struct const *task)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -220,6 +220,28 @@
|
|||
| ((\offset) & 7)
|
||||
.endm
|
||||
|
||||
/*
|
||||
* LDR (ZT0)
|
||||
*
|
||||
* LDR ZT0, nx
|
||||
*/
|
||||
.macro _ldr_zt nx
|
||||
_check_general_reg \nx
|
||||
.inst 0xe11f8000 \
|
||||
| (\nx << 5)
|
||||
.endm
|
||||
|
||||
/*
|
||||
* STR (ZT0)
|
||||
*
|
||||
* STR ZT0, nx
|
||||
*/
|
||||
.macro _str_zt nx
|
||||
_check_general_reg \nx
|
||||
.inst 0xe13f8000 \
|
||||
| (\nx << 5)
|
||||
.endm
|
||||
|
||||
/*
|
||||
* Zero the entire ZA array
|
||||
* ZERO ZA
|
||||
|
|
|
@ -123,6 +123,12 @@
|
|||
#define KERNEL_HWCAP_CSSC __khwcap2_feature(CSSC)
|
||||
#define KERNEL_HWCAP_RPRFM __khwcap2_feature(RPRFM)
|
||||
#define KERNEL_HWCAP_SVE2P1 __khwcap2_feature(SVE2P1)
|
||||
#define KERNEL_HWCAP_SME2 __khwcap2_feature(SME2)
|
||||
#define KERNEL_HWCAP_SME2P1 __khwcap2_feature(SME2P1)
|
||||
#define KERNEL_HWCAP_SME_I16I32 __khwcap2_feature(SME_I16I32)
|
||||
#define KERNEL_HWCAP_SME_BI32I32 __khwcap2_feature(SME_BI32I32)
|
||||
#define KERNEL_HWCAP_SME_B16B16 __khwcap2_feature(SME_B16B16)
|
||||
#define KERNEL_HWCAP_SME_F16F16 __khwcap2_feature(SME_F16F16)
|
||||
|
||||
/*
|
||||
* This yields a mask that user programs can use to figure out what
|
||||
|
|
|
@ -161,7 +161,7 @@ struct thread_struct {
|
|||
enum fp_type fp_type; /* registers FPSIMD or SVE? */
|
||||
unsigned int fpsimd_cpu;
|
||||
void *sve_state; /* SVE registers, if any */
|
||||
void *za_state; /* ZA register, if any */
|
||||
void *sme_state; /* ZA and ZT state, if any */
|
||||
unsigned int vl[ARM64_VEC_MAX]; /* vector length */
|
||||
unsigned int vl_onexec[ARM64_VEC_MAX]; /* vl after next exec */
|
||||
unsigned long fault_address; /* fault info */
|
||||
|
|
|
@ -96,5 +96,11 @@
|
|||
#define HWCAP2_CSSC (1UL << 34)
|
||||
#define HWCAP2_RPRFM (1UL << 35)
|
||||
#define HWCAP2_SVE2P1 (1UL << 36)
|
||||
#define HWCAP2_SME2 (1UL << 37)
|
||||
#define HWCAP2_SME2P1 (1UL << 38)
|
||||
#define HWCAP2_SME_I16I32 (1UL << 39)
|
||||
#define HWCAP2_SME_BI32I32 (1UL << 40)
|
||||
#define HWCAP2_SME_B16B16 (1UL << 41)
|
||||
#define HWCAP2_SME_F16F16 (1UL << 42)
|
||||
|
||||
#endif /* _UAPI__ASM_HWCAP_H */
|
||||
|
|
|
@ -152,6 +152,14 @@ struct za_context {
|
|||
__u16 __reserved[3];
|
||||
};
|
||||
|
||||
#define ZT_MAGIC 0x5a544e01
|
||||
|
||||
struct zt_context {
|
||||
struct _aarch64_ctx head;
|
||||
__u16 nregs;
|
||||
__u16 __reserved[3];
|
||||
};
|
||||
|
||||
#endif /* !__ASSEMBLY__ */
|
||||
|
||||
#include <asm/sve_context.h>
|
||||
|
@ -304,4 +312,15 @@ struct za_context {
|
|||
#define ZA_SIG_CONTEXT_SIZE(vq) \
|
||||
(ZA_SIG_REGS_OFFSET + ZA_SIG_REGS_SIZE(vq))
|
||||
|
||||
#define ZT_SIG_REG_SIZE 512
|
||||
|
||||
#define ZT_SIG_REG_BYTES (ZT_SIG_REG_SIZE / 8)
|
||||
|
||||
#define ZT_SIG_REGS_OFFSET sizeof(struct zt_context)
|
||||
|
||||
#define ZT_SIG_REGS_SIZE(n) (ZT_SIG_REG_BYTES * n)
|
||||
|
||||
#define ZT_SIG_CONTEXT_SIZE(n) \
|
||||
(sizeof(struct zt_context) + ZT_SIG_REGS_SIZE(n))
|
||||
|
||||
#endif /* _UAPI__ASM_SIGCONTEXT_H */
|
||||
|
|
|
@ -282,16 +282,26 @@ static const struct arm64_ftr_bits ftr_id_aa64zfr0[] = {
|
|||
static const struct arm64_ftr_bits ftr_id_aa64smfr0[] = {
|
||||
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
|
||||
FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_FA64_SHIFT, 1, 0),
|
||||
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
|
||||
FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_SMEver_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
|
||||
FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_I16I64_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
|
||||
FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_F64F64_SHIFT, 1, 0),
|
||||
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
|
||||
FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_I16I32_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
|
||||
FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_B16B16_SHIFT, 1, 0),
|
||||
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
|
||||
FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_F16F16_SHIFT, 1, 0),
|
||||
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
|
||||
FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_I8I32_SHIFT, 4, 0),
|
||||
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
|
||||
FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_F16F32_SHIFT, 1, 0),
|
||||
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
|
||||
FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_B16F32_SHIFT, 1, 0),
|
||||
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
|
||||
FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_BI32I32_SHIFT, 1, 0),
|
||||
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
|
||||
FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_F32F32_SHIFT, 1, 0),
|
||||
ARM64_FTR_END,
|
||||
|
@ -2649,6 +2659,18 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
|
|||
.matches = has_cpuid_feature,
|
||||
.cpu_enable = fa64_kernel_enable,
|
||||
},
|
||||
{
|
||||
.desc = "SME2",
|
||||
.type = ARM64_CPUCAP_SYSTEM_FEATURE,
|
||||
.capability = ARM64_SME2,
|
||||
.sys_reg = SYS_ID_AA64PFR1_EL1,
|
||||
.sign = FTR_UNSIGNED,
|
||||
.field_pos = ID_AA64PFR1_EL1_SME_SHIFT,
|
||||
.field_width = ID_AA64PFR1_EL1_SME_WIDTH,
|
||||
.min_field_value = ID_AA64PFR1_EL1_SME_SME2,
|
||||
.matches = has_cpuid_feature,
|
||||
.cpu_enable = sme2_kernel_enable,
|
||||
},
|
||||
#endif /* CONFIG_ARM64_SME */
|
||||
{
|
||||
.desc = "WFx with timeout",
|
||||
|
@ -2827,11 +2849,17 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = {
|
|||
#ifdef CONFIG_ARM64_SME
|
||||
HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_EL1_SME_SHIFT, 4, FTR_UNSIGNED, ID_AA64PFR1_EL1_SME_IMP, CAP_HWCAP, KERNEL_HWCAP_SME),
|
||||
HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_EL1_FA64_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_EL1_FA64_IMP, CAP_HWCAP, KERNEL_HWCAP_SME_FA64),
|
||||
HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_EL1_SMEver_SHIFT, 4, FTR_UNSIGNED, ID_AA64SMFR0_EL1_SMEver_SME2p1, CAP_HWCAP, KERNEL_HWCAP_SME2P1),
|
||||
HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_EL1_SMEver_SHIFT, 4, FTR_UNSIGNED, ID_AA64SMFR0_EL1_SMEver_SME2, CAP_HWCAP, KERNEL_HWCAP_SME2),
|
||||
HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_EL1_I16I64_SHIFT, 4, FTR_UNSIGNED, ID_AA64SMFR0_EL1_I16I64_IMP, CAP_HWCAP, KERNEL_HWCAP_SME_I16I64),
|
||||
HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_EL1_F64F64_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_EL1_F64F64_IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F64F64),
|
||||
HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_EL1_I16I32_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_EL1_I16I32_IMP, CAP_HWCAP, KERNEL_HWCAP_SME_I16I32),
|
||||
HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_EL1_B16B16_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_EL1_B16B16_IMP, CAP_HWCAP, KERNEL_HWCAP_SME_B16B16),
|
||||
HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_EL1_F16F16_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_EL1_F16F16_IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F16F16),
|
||||
HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_EL1_I8I32_SHIFT, 4, FTR_UNSIGNED, ID_AA64SMFR0_EL1_I8I32_IMP, CAP_HWCAP, KERNEL_HWCAP_SME_I8I32),
|
||||
HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_EL1_F16F32_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_EL1_F16F32_IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F16F32),
|
||||
HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_EL1_B16F32_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_EL1_B16F32_IMP, CAP_HWCAP, KERNEL_HWCAP_SME_B16F32),
|
||||
HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_EL1_BI32I32_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_EL1_BI32I32_IMP, CAP_HWCAP, KERNEL_HWCAP_SME_BI32I32),
|
||||
HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_EL1_F32F32_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_EL1_F32F32_IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F32F32),
|
||||
#endif /* CONFIG_ARM64_SME */
|
||||
{},
|
||||
|
|
|
@ -119,6 +119,12 @@ static const char *const hwcap_str[] = {
|
|||
[KERNEL_HWCAP_CSSC] = "cssc",
|
||||
[KERNEL_HWCAP_RPRFM] = "rprfm",
|
||||
[KERNEL_HWCAP_SVE2P1] = "sve2p1",
|
||||
[KERNEL_HWCAP_SME2] = "sme2",
|
||||
[KERNEL_HWCAP_SME2P1] = "sme2p1",
|
||||
[KERNEL_HWCAP_SME_I16I32] = "smei16i32",
|
||||
[KERNEL_HWCAP_SME_BI32I32] = "smebi32i32",
|
||||
[KERNEL_HWCAP_SME_B16B16] = "smeb16b16",
|
||||
[KERNEL_HWCAP_SME_F16F16] = "smef16f16",
|
||||
};
|
||||
|
||||
#ifdef CONFIG_COMPAT
|
||||
|
|
|
@ -100,25 +100,35 @@ SYM_FUNC_START(sme_set_vq)
|
|||
SYM_FUNC_END(sme_set_vq)
|
||||
|
||||
/*
|
||||
* Save the SME state
|
||||
* Save the ZA and ZT state
|
||||
*
|
||||
* x0 - pointer to buffer for state
|
||||
* x1 - number of ZT registers to save
|
||||
*/
|
||||
SYM_FUNC_START(za_save_state)
|
||||
_sme_rdsvl 1, 1 // x1 = VL/8
|
||||
sme_save_za 0, x1, 12
|
||||
SYM_FUNC_START(sme_save_state)
|
||||
_sme_rdsvl 2, 1 // x2 = VL/8
|
||||
sme_save_za 0, x2, 12 // Leaves x0 pointing to the end of ZA
|
||||
|
||||
cbz x1, 1f
|
||||
_str_zt 0
|
||||
1:
|
||||
ret
|
||||
SYM_FUNC_END(za_save_state)
|
||||
SYM_FUNC_END(sme_save_state)
|
||||
|
||||
/*
|
||||
* Load the SME state
|
||||
* Load the ZA and ZT state
|
||||
*
|
||||
* x0 - pointer to buffer for state
|
||||
* x1 - number of ZT registers to save
|
||||
*/
|
||||
SYM_FUNC_START(za_load_state)
|
||||
_sme_rdsvl 1, 1 // x1 = VL/8
|
||||
sme_load_za 0, x1, 12
|
||||
SYM_FUNC_START(sme_load_state)
|
||||
_sme_rdsvl 2, 1 // x2 = VL/8
|
||||
sme_load_za 0, x2, 12 // Leaves x0 pointing to the end of ZA
|
||||
|
||||
cbz x1, 1f
|
||||
_ldr_zt 0
|
||||
1:
|
||||
ret
|
||||
SYM_FUNC_END(za_load_state)
|
||||
SYM_FUNC_END(sme_load_state)
|
||||
|
||||
#endif /* CONFIG_ARM64_SME */
|
||||
|
|
|
@ -299,7 +299,7 @@ void task_set_vl_onexec(struct task_struct *task, enum vec_type type,
|
|||
/*
|
||||
* TIF_SME controls whether a task can use SME without trapping while
|
||||
* in userspace, when TIF_SME is set then we must have storage
|
||||
* alocated in sve_state and za_state to store the contents of both ZA
|
||||
* alocated in sve_state and sme_state to store the contents of both ZA
|
||||
* and the SVE registers for both streaming and non-streaming modes.
|
||||
*
|
||||
* If both SVCR.ZA and SVCR.SM are disabled then at any point we
|
||||
|
@ -429,7 +429,8 @@ static void task_fpsimd_load(void)
|
|||
write_sysreg_s(current->thread.svcr, SYS_SVCR);
|
||||
|
||||
if (thread_za_enabled(¤t->thread))
|
||||
za_load_state(current->thread.za_state);
|
||||
sme_load_state(current->thread.sme_state,
|
||||
system_supports_sme2());
|
||||
|
||||
if (thread_sm_enabled(¤t->thread))
|
||||
restore_ffr = system_supports_fa64();
|
||||
|
@ -490,7 +491,8 @@ static void fpsimd_save(void)
|
|||
*svcr = read_sysreg_s(SYS_SVCR);
|
||||
|
||||
if (*svcr & SVCR_ZA_MASK)
|
||||
za_save_state(last->za_state);
|
||||
sme_save_state(last->sme_state,
|
||||
system_supports_sme2());
|
||||
|
||||
/* If we are in streaming mode override regular SVE. */
|
||||
if (*svcr & SVCR_SM_MASK) {
|
||||
|
@ -1257,30 +1259,30 @@ void fpsimd_release_task(struct task_struct *dead_task)
|
|||
#ifdef CONFIG_ARM64_SME
|
||||
|
||||
/*
|
||||
* Ensure that task->thread.za_state is allocated and sufficiently large.
|
||||
* Ensure that task->thread.sme_state is allocated and sufficiently large.
|
||||
*
|
||||
* This function should be used only in preparation for replacing
|
||||
* task->thread.za_state with new data. The memory is always zeroed
|
||||
* task->thread.sme_state with new data. The memory is always zeroed
|
||||
* here to prevent stale data from showing through: this is done in
|
||||
* the interest of testability and predictability, the architecture
|
||||
* guarantees that when ZA is enabled it will be zeroed.
|
||||
*/
|
||||
void sme_alloc(struct task_struct *task)
|
||||
{
|
||||
if (task->thread.za_state) {
|
||||
memset(task->thread.za_state, 0, za_state_size(task));
|
||||
if (task->thread.sme_state) {
|
||||
memset(task->thread.sme_state, 0, sme_state_size(task));
|
||||
return;
|
||||
}
|
||||
|
||||
/* This could potentially be up to 64K. */
|
||||
task->thread.za_state =
|
||||
kzalloc(za_state_size(task), GFP_KERNEL);
|
||||
task->thread.sme_state =
|
||||
kzalloc(sme_state_size(task), GFP_KERNEL);
|
||||
}
|
||||
|
||||
static void sme_free(struct task_struct *task)
|
||||
{
|
||||
kfree(task->thread.za_state);
|
||||
task->thread.za_state = NULL;
|
||||
kfree(task->thread.sme_state);
|
||||
task->thread.sme_state = NULL;
|
||||
}
|
||||
|
||||
void sme_kernel_enable(const struct arm64_cpu_capabilities *__always_unused p)
|
||||
|
@ -1298,6 +1300,17 @@ void sme_kernel_enable(const struct arm64_cpu_capabilities *__always_unused p)
|
|||
isb();
|
||||
}
|
||||
|
||||
/*
|
||||
* This must be called after sme_kernel_enable(), we rely on the
|
||||
* feature table being sorted to ensure this.
|
||||
*/
|
||||
void sme2_kernel_enable(const struct arm64_cpu_capabilities *__always_unused p)
|
||||
{
|
||||
/* Allow use of ZT0 */
|
||||
write_sysreg_s(read_sysreg_s(SYS_SMCR_EL1) | SMCR_ELx_EZT0_MASK,
|
||||
SYS_SMCR_EL1);
|
||||
}
|
||||
|
||||
/*
|
||||
* This must be called after sme_kernel_enable(), we rely on the
|
||||
* feature table being sorted to ensure this.
|
||||
|
@ -1488,7 +1501,7 @@ void do_sme_acc(unsigned long esr, struct pt_regs *regs)
|
|||
|
||||
sve_alloc(current, false);
|
||||
sme_alloc(current);
|
||||
if (!current->thread.sve_state || !current->thread.za_state) {
|
||||
if (!current->thread.sve_state || !current->thread.sme_state) {
|
||||
force_sig(SIGKILL);
|
||||
return;
|
||||
}
|
||||
|
@ -1609,7 +1622,7 @@ static void fpsimd_flush_thread_vl(enum vec_type type)
|
|||
void fpsimd_flush_thread(void)
|
||||
{
|
||||
void *sve_state = NULL;
|
||||
void *za_state = NULL;
|
||||
void *sme_state = NULL;
|
||||
|
||||
if (!system_supports_fpsimd())
|
||||
return;
|
||||
|
@ -1634,8 +1647,8 @@ void fpsimd_flush_thread(void)
|
|||
clear_thread_flag(TIF_SME);
|
||||
|
||||
/* Defer kfree() while in atomic context */
|
||||
za_state = current->thread.za_state;
|
||||
current->thread.za_state = NULL;
|
||||
sme_state = current->thread.sme_state;
|
||||
current->thread.sme_state = NULL;
|
||||
|
||||
fpsimd_flush_thread_vl(ARM64_VEC_SME);
|
||||
current->thread.svcr = 0;
|
||||
|
@ -1645,7 +1658,7 @@ void fpsimd_flush_thread(void)
|
|||
|
||||
put_cpu_fpsimd_context();
|
||||
kfree(sve_state);
|
||||
kfree(za_state);
|
||||
kfree(sme_state);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -1711,7 +1724,7 @@ static void fpsimd_bind_task_to_cpu(void)
|
|||
WARN_ON(!system_supports_fpsimd());
|
||||
last->st = ¤t->thread.uw.fpsimd_state;
|
||||
last->sve_state = current->thread.sve_state;
|
||||
last->za_state = current->thread.za_state;
|
||||
last->sme_state = current->thread.sme_state;
|
||||
last->sve_vl = task_get_sve_vl(current);
|
||||
last->sme_vl = task_get_sme_vl(current);
|
||||
last->svcr = ¤t->thread.svcr;
|
||||
|
|
|
@ -132,6 +132,13 @@ SYM_CODE_START_LOCAL(__finalise_el2)
|
|||
orr x0, x0, SMCR_ELx_FA64_MASK
|
||||
.Lskip_sme_fa64:
|
||||
|
||||
// ZT0 available?
|
||||
mrs_s x1, SYS_ID_AA64SMFR0_EL1
|
||||
__check_override id_aa64smfr0 ID_AA64SMFR0_EL1_SMEver_SHIFT 4 .Linit_sme_zt0 .Lskip_sme_zt0
|
||||
.Linit_sme_zt0:
|
||||
orr x0, x0, SMCR_ELx_EZT0_MASK
|
||||
.Lskip_sme_zt0:
|
||||
|
||||
orr x0, x0, #SMCR_ELx_LEN_MASK // Enable full SME vector
|
||||
msr_s SYS_SMCR_EL2, x0 // length for EL1.
|
||||
|
||||
|
|
|
@ -131,6 +131,7 @@ static const struct ftr_set_desc smfr0 __initconst = {
|
|||
.name = "id_aa64smfr0",
|
||||
.override = &id_aa64smfr0_override,
|
||||
.fields = {
|
||||
FIELD("smever", ID_AA64SMFR0_EL1_SMEver_SHIFT, NULL),
|
||||
/* FA64 is a one bit field... :-/ */
|
||||
{ "fa64", ID_AA64SMFR0_EL1_FA64_SHIFT, 1, },
|
||||
{}
|
||||
|
|
|
@ -307,27 +307,28 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
|
|||
|
||||
/*
|
||||
* In the unlikely event that we create a new thread with ZA
|
||||
* enabled we should retain the ZA state so duplicate it here.
|
||||
* This may be shortly freed if we exec() or if CLONE_SETTLS
|
||||
* but it's simpler to do it here. To avoid confusing the rest
|
||||
* of the code ensure that we have a sve_state allocated
|
||||
* whenever za_state is allocated.
|
||||
* enabled we should retain the ZA and ZT state so duplicate
|
||||
* it here. This may be shortly freed if we exec() or if
|
||||
* CLONE_SETTLS but it's simpler to do it here. To avoid
|
||||
* confusing the rest of the code ensure that we have a
|
||||
* sve_state allocated whenever sme_state is allocated.
|
||||
*/
|
||||
if (thread_za_enabled(&src->thread)) {
|
||||
dst->thread.sve_state = kzalloc(sve_state_size(src),
|
||||
GFP_KERNEL);
|
||||
if (!dst->thread.sve_state)
|
||||
return -ENOMEM;
|
||||
dst->thread.za_state = kmemdup(src->thread.za_state,
|
||||
za_state_size(src),
|
||||
GFP_KERNEL);
|
||||
if (!dst->thread.za_state) {
|
||||
|
||||
dst->thread.sme_state = kmemdup(src->thread.sme_state,
|
||||
sme_state_size(src),
|
||||
GFP_KERNEL);
|
||||
if (!dst->thread.sme_state) {
|
||||
kfree(dst->thread.sve_state);
|
||||
dst->thread.sve_state = NULL;
|
||||
return -ENOMEM;
|
||||
}
|
||||
} else {
|
||||
dst->thread.za_state = NULL;
|
||||
dst->thread.sme_state = NULL;
|
||||
clear_tsk_thread_flag(dst, TIF_SME);
|
||||
}
|
||||
|
||||
|
|
|
@ -1045,7 +1045,7 @@ static int za_get(struct task_struct *target,
|
|||
if (thread_za_enabled(&target->thread)) {
|
||||
start = end;
|
||||
end = ZA_PT_SIZE(vq);
|
||||
membuf_write(&to, target->thread.za_state, end - start);
|
||||
membuf_write(&to, target->thread.sme_state, end - start);
|
||||
}
|
||||
|
||||
/* Zero any trailing padding */
|
||||
|
@ -1099,7 +1099,7 @@ static int za_set(struct task_struct *target,
|
|||
|
||||
/* Allocate/reinit ZA storage */
|
||||
sme_alloc(target);
|
||||
if (!target->thread.za_state) {
|
||||
if (!target->thread.sme_state) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
@ -1124,7 +1124,7 @@ static int za_set(struct task_struct *target,
|
|||
start = ZA_PT_ZA_OFFSET;
|
||||
end = ZA_PT_SIZE(vq);
|
||||
ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
|
||||
target->thread.za_state,
|
||||
target->thread.sme_state,
|
||||
start, end);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
@ -1138,6 +1138,51 @@ out:
|
|||
return ret;
|
||||
}
|
||||
|
||||
static int zt_get(struct task_struct *target,
|
||||
const struct user_regset *regset,
|
||||
struct membuf to)
|
||||
{
|
||||
if (!system_supports_sme2())
|
||||
return -EINVAL;
|
||||
|
||||
/*
|
||||
* If PSTATE.ZA is not set then ZT will be zeroed when it is
|
||||
* enabled so report the current register value as zero.
|
||||
*/
|
||||
if (thread_za_enabled(&target->thread))
|
||||
membuf_write(&to, thread_zt_state(&target->thread),
|
||||
ZT_SIG_REG_BYTES);
|
||||
else
|
||||
membuf_zero(&to, ZT_SIG_REG_BYTES);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int zt_set(struct task_struct *target,
|
||||
const struct user_regset *regset,
|
||||
unsigned int pos, unsigned int count,
|
||||
const void *kbuf, const void __user *ubuf)
|
||||
{
|
||||
int ret;
|
||||
|
||||
if (!system_supports_sme2())
|
||||
return -EINVAL;
|
||||
|
||||
if (!thread_za_enabled(&target->thread)) {
|
||||
sme_alloc(target);
|
||||
if (!target->thread.sme_state)
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
|
||||
thread_zt_state(&target->thread),
|
||||
0, ZT_SIG_REG_BYTES);
|
||||
if (ret == 0)
|
||||
target->thread.svcr |= SVCR_ZA_MASK;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
#endif /* CONFIG_ARM64_SME */
|
||||
|
||||
#ifdef CONFIG_ARM64_PTR_AUTH
|
||||
|
@ -1360,6 +1405,7 @@ enum aarch64_regset {
|
|||
#ifdef CONFIG_ARM64_SME
|
||||
REGSET_SSVE,
|
||||
REGSET_ZA,
|
||||
REGSET_ZT,
|
||||
#endif
|
||||
#ifdef CONFIG_ARM64_PTR_AUTH
|
||||
REGSET_PAC_MASK,
|
||||
|
@ -1467,6 +1513,14 @@ static const struct user_regset aarch64_regsets[] = {
|
|||
.regset_get = za_get,
|
||||
.set = za_set,
|
||||
},
|
||||
[REGSET_ZT] = { /* SME ZT */
|
||||
.core_note_type = NT_ARM_ZT,
|
||||
.n = 1,
|
||||
.size = ZT_SIG_REG_BYTES,
|
||||
.align = sizeof(u64),
|
||||
.regset_get = zt_get,
|
||||
.set = zt_set,
|
||||
},
|
||||
#endif
|
||||
#ifdef CONFIG_ARM64_PTR_AUTH
|
||||
[REGSET_PAC_MASK] = {
|
||||
|
|
|
@ -57,6 +57,7 @@ struct rt_sigframe_user_layout {
|
|||
unsigned long esr_offset;
|
||||
unsigned long sve_offset;
|
||||
unsigned long za_offset;
|
||||
unsigned long zt_offset;
|
||||
unsigned long extra_offset;
|
||||
unsigned long end_offset;
|
||||
};
|
||||
|
@ -221,6 +222,7 @@ struct user_ctxs {
|
|||
struct fpsimd_context __user *fpsimd;
|
||||
struct sve_context __user *sve;
|
||||
struct za_context __user *za;
|
||||
struct zt_context __user *zt;
|
||||
};
|
||||
|
||||
#ifdef CONFIG_ARM64_SVE
|
||||
|
@ -394,7 +396,7 @@ static int preserve_za_context(struct za_context __user *ctx)
|
|||
* fpsimd_signal_preserve_current_state().
|
||||
*/
|
||||
err |= __copy_to_user((char __user *)ctx + ZA_SIG_REGS_OFFSET,
|
||||
current->thread.za_state,
|
||||
current->thread.sme_state,
|
||||
ZA_SIG_REGS_SIZE(vq));
|
||||
}
|
||||
|
||||
|
@ -425,7 +427,7 @@ static int restore_za_context(struct user_ctxs *user)
|
|||
|
||||
/*
|
||||
* Careful: we are about __copy_from_user() directly into
|
||||
* thread.za_state with preemption enabled, so protection is
|
||||
* thread.sme_state with preemption enabled, so protection is
|
||||
* needed to prevent a racing context switch from writing stale
|
||||
* registers back over the new data.
|
||||
*/
|
||||
|
@ -434,13 +436,13 @@ static int restore_za_context(struct user_ctxs *user)
|
|||
/* From now, fpsimd_thread_switch() won't touch thread.sve_state */
|
||||
|
||||
sme_alloc(current);
|
||||
if (!current->thread.za_state) {
|
||||
if (!current->thread.sme_state) {
|
||||
current->thread.svcr &= ~SVCR_ZA_MASK;
|
||||
clear_thread_flag(TIF_SME);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
err = __copy_from_user(current->thread.za_state,
|
||||
err = __copy_from_user(current->thread.sme_state,
|
||||
(char __user const *)user->za +
|
||||
ZA_SIG_REGS_OFFSET,
|
||||
ZA_SIG_REGS_SIZE(vq));
|
||||
|
@ -452,11 +454,81 @@ static int restore_za_context(struct user_ctxs *user)
|
|||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int preserve_zt_context(struct zt_context __user *ctx)
|
||||
{
|
||||
int err = 0;
|
||||
u16 reserved[ARRAY_SIZE(ctx->__reserved)];
|
||||
|
||||
if (WARN_ON(!thread_za_enabled(¤t->thread)))
|
||||
return -EINVAL;
|
||||
|
||||
memset(reserved, 0, sizeof(reserved));
|
||||
|
||||
__put_user_error(ZT_MAGIC, &ctx->head.magic, err);
|
||||
__put_user_error(round_up(ZT_SIG_CONTEXT_SIZE(1), 16),
|
||||
&ctx->head.size, err);
|
||||
__put_user_error(1, &ctx->nregs, err);
|
||||
BUILD_BUG_ON(sizeof(ctx->__reserved) != sizeof(reserved));
|
||||
err |= __copy_to_user(&ctx->__reserved, reserved, sizeof(reserved));
|
||||
|
||||
/*
|
||||
* This assumes that the ZT state has already been saved to
|
||||
* the task struct by calling the function
|
||||
* fpsimd_signal_preserve_current_state().
|
||||
*/
|
||||
err |= __copy_to_user((char __user *)ctx + ZT_SIG_REGS_OFFSET,
|
||||
thread_zt_state(¤t->thread),
|
||||
ZT_SIG_REGS_SIZE(1));
|
||||
|
||||
return err ? -EFAULT : 0;
|
||||
}
|
||||
|
||||
static int restore_zt_context(struct user_ctxs *user)
|
||||
{
|
||||
int err;
|
||||
struct zt_context zt;
|
||||
|
||||
/* ZA must be restored first for this check to be valid */
|
||||
if (!thread_za_enabled(¤t->thread))
|
||||
return -EINVAL;
|
||||
|
||||
if (__copy_from_user(&zt, user->zt, sizeof(zt)))
|
||||
return -EFAULT;
|
||||
|
||||
if (zt.nregs != 1)
|
||||
return -EINVAL;
|
||||
|
||||
if (zt.head.size != ZT_SIG_CONTEXT_SIZE(zt.nregs))
|
||||
return -EINVAL;
|
||||
|
||||
/*
|
||||
* Careful: we are about __copy_from_user() directly into
|
||||
* thread.zt_state with preemption enabled, so protection is
|
||||
* needed to prevent a racing context switch from writing stale
|
||||
* registers back over the new data.
|
||||
*/
|
||||
|
||||
fpsimd_flush_task_state(current);
|
||||
/* From now, fpsimd_thread_switch() won't touch ZT in thread state */
|
||||
|
||||
err = __copy_from_user(thread_zt_state(¤t->thread),
|
||||
(char __user const *)user->zt +
|
||||
ZT_SIG_REGS_OFFSET,
|
||||
ZT_SIG_REGS_SIZE(1));
|
||||
if (err)
|
||||
return -EFAULT;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#else /* ! CONFIG_ARM64_SME */
|
||||
|
||||
/* Turn any non-optimised out attempts to use these into a link error: */
|
||||
extern int preserve_za_context(void __user *ctx);
|
||||
extern int restore_za_context(struct user_ctxs *user);
|
||||
extern int preserve_zt_context(void __user *ctx);
|
||||
extern int restore_zt_context(struct user_ctxs *user);
|
||||
|
||||
#endif /* ! CONFIG_ARM64_SME */
|
||||
|
||||
|
@ -474,6 +546,7 @@ static int parse_user_sigframe(struct user_ctxs *user,
|
|||
user->fpsimd = NULL;
|
||||
user->sve = NULL;
|
||||
user->za = NULL;
|
||||
user->zt = NULL;
|
||||
|
||||
if (!IS_ALIGNED((unsigned long)base, 16))
|
||||
goto invalid;
|
||||
|
@ -552,6 +625,19 @@ static int parse_user_sigframe(struct user_ctxs *user,
|
|||
user->za = (struct za_context __user *)head;
|
||||
break;
|
||||
|
||||
case ZT_MAGIC:
|
||||
if (!system_supports_sme2())
|
||||
goto invalid;
|
||||
|
||||
if (user->zt)
|
||||
goto invalid;
|
||||
|
||||
if (size < sizeof(*user->zt))
|
||||
goto invalid;
|
||||
|
||||
user->zt = (struct zt_context __user *)head;
|
||||
break;
|
||||
|
||||
case EXTRA_MAGIC:
|
||||
if (have_extra_context)
|
||||
goto invalid;
|
||||
|
@ -674,6 +760,9 @@ static int restore_sigframe(struct pt_regs *regs,
|
|||
if (err == 0 && system_supports_sme() && user.za)
|
||||
err = restore_za_context(&user);
|
||||
|
||||
if (err == 0 && system_supports_sme2() && user.zt)
|
||||
err = restore_zt_context(&user);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
|
@ -774,6 +863,15 @@ static int setup_sigframe_layout(struct rt_sigframe_user_layout *user,
|
|||
return err;
|
||||
}
|
||||
|
||||
if (system_supports_sme2()) {
|
||||
if (add_all || thread_za_enabled(¤t->thread)) {
|
||||
err = sigframe_alloc(user, &user->zt_offset,
|
||||
ZT_SIG_CONTEXT_SIZE(1));
|
||||
if (err)
|
||||
return err;
|
||||
}
|
||||
}
|
||||
|
||||
return sigframe_alloc_end(user);
|
||||
}
|
||||
|
||||
|
@ -829,6 +927,13 @@ static int setup_sigframe(struct rt_sigframe_user_layout *user,
|
|||
err |= preserve_za_context(za_ctx);
|
||||
}
|
||||
|
||||
/* ZT state if present */
|
||||
if (system_supports_sme2() && err == 0 && user->zt_offset) {
|
||||
struct zt_context __user *zt_ctx =
|
||||
apply_user_offset(user, user->zt_offset);
|
||||
err |= preserve_zt_context(zt_ctx);
|
||||
}
|
||||
|
||||
if (err == 0 && user->extra_offset) {
|
||||
char __user *sfp = (char __user *)user->sigframe;
|
||||
char __user *userp =
|
||||
|
|
|
@ -143,7 +143,7 @@ void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu)
|
|||
fp_state.st = &vcpu->arch.ctxt.fp_regs;
|
||||
fp_state.sve_state = vcpu->arch.sve_state;
|
||||
fp_state.sve_vl = vcpu->arch.sve_max_vl;
|
||||
fp_state.za_state = NULL;
|
||||
fp_state.sme_state = NULL;
|
||||
fp_state.svcr = &vcpu->arch.svcr;
|
||||
fp_state.fp_type = &vcpu->arch.fp_type;
|
||||
|
||||
|
|
|
@ -50,6 +50,7 @@ MTE
|
|||
MTE_ASYMM
|
||||
SME
|
||||
SME_FA64
|
||||
SME2
|
||||
SPECTRE_V2
|
||||
SPECTRE_V3A
|
||||
SPECTRE_V4
|
||||
|
|
|
@ -894,6 +894,7 @@ EndEnum
|
|||
Enum 27:24 SME
|
||||
0b0000 NI
|
||||
0b0001 IMP
|
||||
0b0010 SME2
|
||||
EndEnum
|
||||
Res0 23:20
|
||||
Enum 19:16 MPAM_frac
|
||||
|
@ -975,7 +976,9 @@ Enum 63 FA64
|
|||
EndEnum
|
||||
Res0 62:60
|
||||
Enum 59:56 SMEver
|
||||
0b0000 IMP
|
||||
0b0000 SME
|
||||
0b0001 SME2
|
||||
0b0010 SME2p1
|
||||
EndEnum
|
||||
Enum 55:52 I16I64
|
||||
0b0000 NI
|
||||
|
@ -986,7 +989,19 @@ Enum 48 F64F64
|
|||
0b0 NI
|
||||
0b1 IMP
|
||||
EndEnum
|
||||
Res0 47:40
|
||||
Enum 47:44 I16I32
|
||||
0b0000 NI
|
||||
0b0101 IMP
|
||||
EndEnum
|
||||
Enum 43 B16B16
|
||||
0b0 NI
|
||||
0b1 IMP
|
||||
EndEnum
|
||||
Enum 42 F16F16
|
||||
0b0 NI
|
||||
0b1 IMP
|
||||
EndEnum
|
||||
Res0 41:40
|
||||
Enum 39:36 I8I32
|
||||
0b0000 NI
|
||||
0b1111 IMP
|
||||
|
@ -999,7 +1014,10 @@ Enum 34 B16F32
|
|||
0b0 NI
|
||||
0b1 IMP
|
||||
EndEnum
|
||||
Res0 33
|
||||
Enum 33 BI32I32
|
||||
0b0 NI
|
||||
0b1 IMP
|
||||
EndEnum
|
||||
Enum 32 F32F32
|
||||
0b0 NI
|
||||
0b1 IMP
|
||||
|
@ -1599,7 +1617,8 @@ EndSysreg
|
|||
SysregFields SMCR_ELx
|
||||
Res0 63:32
|
||||
Field 31 FA64
|
||||
Res0 30:9
|
||||
Field 30 EZT0
|
||||
Res0 29:9
|
||||
Raz 8:4
|
||||
Field 3:0 LEN
|
||||
EndSysregFields
|
||||
|
|
|
@ -434,6 +434,7 @@ typedef struct elf64_shdr {
|
|||
#define NT_ARM_PAC_ENABLED_KEYS 0x40a /* arm64 ptr auth enabled keys (prctl()) */
|
||||
#define NT_ARM_SSVE 0x40b /* ARM Streaming SVE registers */
|
||||
#define NT_ARM_ZA 0x40c /* ARM SME ZA registers */
|
||||
#define NT_ARM_ZT 0x40d /* ARM SME ZT registers */
|
||||
#define NT_ARC_V2 0x600 /* ARCv2 accumulator/extra registers */
|
||||
#define NT_VMCOREDD 0x700 /* Vmcore Device Dump Note */
|
||||
#define NT_MIPS_DSP 0x800 /* MIPS DSP ASE registers */
|
||||
|
|
|
@ -50,6 +50,78 @@ static void sme_sigill(void)
|
|||
asm volatile(".inst 0x04bf5800" : : : "x0");
|
||||
}
|
||||
|
||||
static void sme2_sigill(void)
|
||||
{
|
||||
/* SMSTART ZA */
|
||||
asm volatile("msr S0_3_C4_C5_3, xzr" : : : );
|
||||
|
||||
/* ZERO ZT0 */
|
||||
asm volatile(".inst 0xc0480001" : : : );
|
||||
|
||||
/* SMSTOP */
|
||||
asm volatile("msr S0_3_C4_C6_3, xzr" : : : );
|
||||
}
|
||||
|
||||
static void sme2p1_sigill(void)
|
||||
{
|
||||
/* SMSTART SM */
|
||||
asm volatile("msr S0_3_C4_C3_3, xzr" : : : );
|
||||
|
||||
/* BFCLAMP { Z0.H - Z1.H }, Z0.H, Z0.H */
|
||||
asm volatile(".inst 0xc120C000" : : : );
|
||||
|
||||
/* SMSTOP */
|
||||
asm volatile("msr S0_3_C4_C6_3, xzr" : : : );
|
||||
}
|
||||
|
||||
static void smei16i32_sigill(void)
|
||||
{
|
||||
/* SMSTART */
|
||||
asm volatile("msr S0_3_C4_C7_3, xzr" : : : );
|
||||
|
||||
/* SMOPA ZA0.S, P0/M, P0/M, Z0.B, Z0.B */
|
||||
asm volatile(".inst 0xa0800000" : : : );
|
||||
|
||||
/* SMSTOP */
|
||||
asm volatile("msr S0_3_C4_C6_3, xzr" : : : );
|
||||
}
|
||||
|
||||
static void smebi32i32_sigill(void)
|
||||
{
|
||||
/* SMSTART */
|
||||
asm volatile("msr S0_3_C4_C7_3, xzr" : : : );
|
||||
|
||||
/* BMOPA ZA0.S, P0/M, P0/M, Z0.B, Z0.B */
|
||||
asm volatile(".inst 0x80800008" : : : );
|
||||
|
||||
/* SMSTOP */
|
||||
asm volatile("msr S0_3_C4_C6_3, xzr" : : : );
|
||||
}
|
||||
|
||||
static void smeb16b16_sigill(void)
|
||||
{
|
||||
/* SMSTART */
|
||||
asm volatile("msr S0_3_C4_C7_3, xzr" : : : );
|
||||
|
||||
/* BFADD ZA.H[W0, 0], {Z0.H-Z1.H} */
|
||||
asm volatile(".inst 0xC1E41C00" : : : );
|
||||
|
||||
/* SMSTOP */
|
||||
asm volatile("msr S0_3_C4_C6_3, xzr" : : : );
|
||||
}
|
||||
|
||||
static void smef16f16_sigill(void)
|
||||
{
|
||||
/* SMSTART */
|
||||
asm volatile("msr S0_3_C4_C7_3, xzr" : : : );
|
||||
|
||||
/* FADD ZA.H[W0, 0], { Z0.H-Z1.H } */
|
||||
asm volatile(".inst 0xc1a41C00" : : : );
|
||||
|
||||
/* SMSTOP */
|
||||
asm volatile("msr S0_3_C4_C6_3, xzr" : : : );
|
||||
}
|
||||
|
||||
static void sve_sigill(void)
|
||||
{
|
||||
/* RDVL x0, #0 */
|
||||
|
@ -158,6 +230,49 @@ static const struct hwcap_data {
|
|||
.sigill_fn = sme_sigill,
|
||||
.sigill_reliable = true,
|
||||
},
|
||||
{
|
||||
.name = "SME2",
|
||||
.at_hwcap = AT_HWCAP2,
|
||||
.hwcap_bit = HWCAP2_SME2,
|
||||
.cpuinfo = "sme2",
|
||||
.sigill_fn = sme2_sigill,
|
||||
.sigill_reliable = true,
|
||||
},
|
||||
{
|
||||
.name = "SME 2.1",
|
||||
.at_hwcap = AT_HWCAP2,
|
||||
.hwcap_bit = HWCAP2_SME2P1,
|
||||
.cpuinfo = "sme2p1",
|
||||
.sigill_fn = sme2p1_sigill,
|
||||
},
|
||||
{
|
||||
.name = "SME I16I32",
|
||||
.at_hwcap = AT_HWCAP2,
|
||||
.hwcap_bit = HWCAP2_SME_I16I32,
|
||||
.cpuinfo = "smei16i32",
|
||||
.sigill_fn = smei16i32_sigill,
|
||||
},
|
||||
{
|
||||
.name = "SME BI32I32",
|
||||
.at_hwcap = AT_HWCAP2,
|
||||
.hwcap_bit = HWCAP2_SME_BI32I32,
|
||||
.cpuinfo = "smebi32i32",
|
||||
.sigill_fn = smebi32i32_sigill,
|
||||
},
|
||||
{
|
||||
.name = "SME B16B16",
|
||||
.at_hwcap = AT_HWCAP2,
|
||||
.hwcap_bit = HWCAP2_SME_B16B16,
|
||||
.cpuinfo = "smeb16b16",
|
||||
.sigill_fn = smeb16b16_sigill,
|
||||
},
|
||||
{
|
||||
.name = "SME F16F16",
|
||||
.at_hwcap = AT_HWCAP2,
|
||||
.hwcap_bit = HWCAP2_SME_F16F16,
|
||||
.cpuinfo = "smef16f16",
|
||||
.sigill_fn = smef16f16_sigill,
|
||||
},
|
||||
{
|
||||
.name = "SVE",
|
||||
.at_hwcap = AT_HWCAP,
|
||||
|
|
|
@ -23,6 +23,9 @@
|
|||
|
||||
.arch_extension sve
|
||||
|
||||
#define ID_AA64SMFR0_EL1_SMEver_SHIFT 56
|
||||
#define ID_AA64SMFR0_EL1_SMEver_WIDTH 4
|
||||
|
||||
/*
|
||||
* LDR (vector to ZA array):
|
||||
* LDR ZA[\nw, #\offset], [X\nxbase, #\offset, MUL VL]
|
||||
|
@ -45,6 +48,26 @@
|
|||
| ((\offset) & 7)
|
||||
.endm
|
||||
|
||||
/*
|
||||
* LDR (ZT0)
|
||||
*
|
||||
* LDR ZT0, nx
|
||||
*/
|
||||
.macro _ldr_zt nx
|
||||
.inst 0xe11f8000 \
|
||||
| (((\nx) & 0x1f) << 5)
|
||||
.endm
|
||||
|
||||
/*
|
||||
* STR (ZT0)
|
||||
*
|
||||
* STR ZT0, nx
|
||||
*/
|
||||
.macro _str_zt nx
|
||||
.inst 0xe13f8000 \
|
||||
| (((\nx) & 0x1f) << 5)
|
||||
.endm
|
||||
|
||||
.globl do_syscall
|
||||
do_syscall:
|
||||
// Store callee saved registers x19-x29 (80 bytes) plus x0 and x1
|
||||
|
@ -64,7 +87,7 @@ do_syscall:
|
|||
msr S3_3_C4_C2_2, x2
|
||||
1:
|
||||
|
||||
// Load ZA if it's enabled - uses x12 as scratch due to SME LDR
|
||||
// Load ZA and ZT0 if enabled - uses x12 as scratch due to SME LDR
|
||||
tbz x2, #SVCR_ZA_SHIFT, 1f
|
||||
mov w12, #0
|
||||
ldr x2, =za_in
|
||||
|
@ -73,6 +96,15 @@ do_syscall:
|
|||
add x12, x12, #1
|
||||
cmp x1, x12
|
||||
bne 2b
|
||||
|
||||
// ZT0
|
||||
mrs x2, S3_0_C0_C4_5 // ID_AA64SMFR0_EL1
|
||||
ubfx x2, x2, #ID_AA64SMFR0_EL1_SMEver_SHIFT, \
|
||||
#ID_AA64SMFR0_EL1_SMEver_WIDTH
|
||||
cbz x2, 1f
|
||||
adrp x2, zt_in
|
||||
add x2, x2, :lo12:zt_in
|
||||
_ldr_zt 2
|
||||
1:
|
||||
|
||||
// Load GPRs x8-x28, and save our SP/FP for later comparison
|
||||
|
@ -235,6 +267,15 @@ do_syscall:
|
|||
add x12, x12, #1
|
||||
cmp x1, x12
|
||||
bne 2b
|
||||
|
||||
// ZT0
|
||||
mrs x2, S3_0_C0_C4_5 // ID_AA64SMFR0_EL1
|
||||
ubfx x2, x2, #ID_AA64SMFR0_EL1_SMEver_SHIFT, \
|
||||
#ID_AA64SMFR0_EL1_SMEver_WIDTH
|
||||
cbz x2, 1f
|
||||
adrp x2, zt_out
|
||||
add x2, x2, :lo12:zt_out
|
||||
_str_zt 2
|
||||
1:
|
||||
|
||||
// Save the SVE state if we have some
|
||||
|
|
|
@ -311,6 +311,35 @@ static int check_za(struct syscall_cfg *cfg, int sve_vl, int sme_vl,
|
|||
return errors;
|
||||
}
|
||||
|
||||
uint8_t zt_in[ZT_SIG_REG_BYTES] __attribute__((aligned(16)));
|
||||
uint8_t zt_out[ZT_SIG_REG_BYTES] __attribute__((aligned(16)));
|
||||
|
||||
static void setup_zt(struct syscall_cfg *cfg, int sve_vl, int sme_vl,
|
||||
uint64_t svcr)
|
||||
{
|
||||
fill_random(zt_in, sizeof(zt_in));
|
||||
memset(zt_out, 0, sizeof(zt_out));
|
||||
}
|
||||
|
||||
static int check_zt(struct syscall_cfg *cfg, int sve_vl, int sme_vl,
|
||||
uint64_t svcr)
|
||||
{
|
||||
int errors = 0;
|
||||
|
||||
if (!(getauxval(AT_HWCAP2) & HWCAP2_SME2))
|
||||
return 0;
|
||||
|
||||
if (!(svcr & SVCR_ZA_MASK))
|
||||
return 0;
|
||||
|
||||
if (memcmp(zt_in, zt_out, sizeof(zt_in)) != 0) {
|
||||
ksft_print_msg("SME VL %d ZT does not match\n", sme_vl);
|
||||
errors++;
|
||||
}
|
||||
|
||||
return errors;
|
||||
}
|
||||
|
||||
typedef void (*setup_fn)(struct syscall_cfg *cfg, int sve_vl, int sme_vl,
|
||||
uint64_t svcr);
|
||||
typedef int (*check_fn)(struct syscall_cfg *cfg, int sve_vl, int sme_vl,
|
||||
|
@ -334,6 +363,7 @@ static struct {
|
|||
{ setup_ffr, check_ffr },
|
||||
{ setup_svcr, check_svcr },
|
||||
{ setup_za, check_za },
|
||||
{ setup_zt, check_zt },
|
||||
};
|
||||
|
||||
static bool do_test(struct syscall_cfg *cfg, int sve_vl, int sme_vl,
|
||||
|
@ -474,6 +504,7 @@ int main(void)
|
|||
{
|
||||
int i;
|
||||
int tests = 1; /* FPSIMD */
|
||||
int sme_ver;
|
||||
|
||||
srandom(getpid());
|
||||
|
||||
|
@ -482,10 +513,15 @@ int main(void)
|
|||
tests += (sve_count_vls() * sme_count_vls()) * 3;
|
||||
ksft_set_plan(ARRAY_SIZE(syscalls) * tests);
|
||||
|
||||
if (getauxval(AT_HWCAP2) & HWCAP2_SME2)
|
||||
sme_ver = 2;
|
||||
else
|
||||
sme_ver = 1;
|
||||
|
||||
if (getauxval(AT_HWCAP2) & HWCAP2_SME_FA64)
|
||||
ksft_print_msg("SME with FA64\n");
|
||||
ksft_print_msg("SME%d with FA64\n", sme_ver);
|
||||
else if (getauxval(AT_HWCAP2) & HWCAP2_SME)
|
||||
ksft_print_msg("SME without FA64\n");
|
||||
ksft_print_msg("SME%d without FA64\n", sme_ver);
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(syscalls); i++)
|
||||
test_one_syscall(&syscalls[i]);
|
||||
|
|
|
@ -12,3 +12,5 @@ vlset
|
|||
za-fork
|
||||
za-ptrace
|
||||
za-test
|
||||
zt-ptrace
|
||||
zt-test
|
||||
|
|
|
@ -14,6 +14,8 @@ TEST_GEN_PROGS_EXTENDED := fp-pidbench fpsimd-test \
|
|||
sve-test \
|
||||
ssve-test \
|
||||
za-test \
|
||||
zt-ptrace \
|
||||
zt-test \
|
||||
vlset
|
||||
TEST_PROGS_EXTENDED := fpsimd-stress sve-stress ssve-stress za-stress
|
||||
|
||||
|
@ -41,5 +43,8 @@ $(OUTPUT)/za-fork: za-fork.c $(OUTPUT)/za-fork-asm.o
|
|||
$(OUTPUT)/za-ptrace: za-ptrace.c
|
||||
$(OUTPUT)/za-test: za-test.S $(OUTPUT)/asm-utils.o
|
||||
$(CC) -nostdlib $^ -o $@
|
||||
$(OUTPUT)/zt-ptrace: zt-ptrace.c
|
||||
$(OUTPUT)/zt-test: zt-test.S $(OUTPUT)/asm-utils.o
|
||||
$(CC) -nostdlib $^ -o $@
|
||||
|
||||
include ../../lib.mk
|
||||
|
|
|
@ -370,6 +370,19 @@ static void start_za(struct child_data *child, int vl, int cpu)
|
|||
ksft_print_msg("Started %s\n", child->name);
|
||||
}
|
||||
|
||||
static void start_zt(struct child_data *child, int cpu)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = asprintf(&child->name, "ZT-%d", cpu);
|
||||
if (ret == -1)
|
||||
ksft_exit_fail_msg("asprintf() failed\n");
|
||||
|
||||
child_start(child, "./zt-test");
|
||||
|
||||
ksft_print_msg("Started %s\n", child->name);
|
||||
}
|
||||
|
||||
static void probe_vls(int vls[], int *vl_count, int set_vl)
|
||||
{
|
||||
unsigned int vq;
|
||||
|
@ -426,6 +439,7 @@ int main(int argc, char **argv)
|
|||
bool all_children_started = false;
|
||||
int seen_children;
|
||||
int sve_vls[MAX_VLS], sme_vls[MAX_VLS];
|
||||
bool have_sme2;
|
||||
struct sigaction sa;
|
||||
|
||||
while ((c = getopt_long(argc, argv, "t:", options, NULL)) != -1) {
|
||||
|
@ -458,6 +472,13 @@ int main(int argc, char **argv)
|
|||
sme_vl_count = 0;
|
||||
}
|
||||
|
||||
if (getauxval(AT_HWCAP2) & HWCAP2_SME2) {
|
||||
tests += cpus;
|
||||
have_sme2 = true;
|
||||
} else {
|
||||
have_sme2 = false;
|
||||
}
|
||||
|
||||
/* Force context switching if we only have FPSIMD */
|
||||
if (!sve_vl_count && !sme_vl_count)
|
||||
fpsimd_per_cpu = 2;
|
||||
|
@ -468,8 +489,9 @@ int main(int argc, char **argv)
|
|||
ksft_print_header();
|
||||
ksft_set_plan(tests);
|
||||
|
||||
ksft_print_msg("%d CPUs, %d SVE VLs, %d SME VLs\n",
|
||||
cpus, sve_vl_count, sme_vl_count);
|
||||
ksft_print_msg("%d CPUs, %d SVE VLs, %d SME VLs, SME2 %s\n",
|
||||
cpus, sve_vl_count, sme_vl_count,
|
||||
have_sme2 ? "present" : "absent");
|
||||
|
||||
if (timeout > 0)
|
||||
ksft_print_msg("Will run for %ds\n", timeout);
|
||||
|
@ -527,6 +549,9 @@ int main(int argc, char **argv)
|
|||
start_ssve(&children[num_children++], sme_vls[j], i);
|
||||
start_za(&children[num_children++], sme_vls[j], i);
|
||||
}
|
||||
|
||||
if (have_sme2)
|
||||
start_zt(&children[num_children++], i);
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
|
@ -48,4 +48,24 @@
|
|||
| ((\offset) & 7)
|
||||
.endm
|
||||
|
||||
/*
|
||||
* LDR (ZT0)
|
||||
*
|
||||
* LDR ZT0, nx
|
||||
*/
|
||||
.macro _ldr_zt nx
|
||||
.inst 0xe11f8000 \
|
||||
| (((\nx) & 0x1f) << 5)
|
||||
.endm
|
||||
|
||||
/*
|
||||
* STR (ZT0)
|
||||
*
|
||||
* STR ZT0, nx
|
||||
*/
|
||||
.macro _str_zt nx
|
||||
.inst 0xe13f8000 \
|
||||
| (((\nx) & 0x1f) << 5)
|
||||
.endm
|
||||
|
||||
#endif
|
||||
|
|
|
@ -0,0 +1,365 @@
|
|||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/*
|
||||
* Copyright (C) 2021 ARM Limited.
|
||||
*/
|
||||
#include <errno.h>
|
||||
#include <stdbool.h>
|
||||
#include <stddef.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <unistd.h>
|
||||
#include <sys/auxv.h>
|
||||
#include <sys/prctl.h>
|
||||
#include <sys/ptrace.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/uio.h>
|
||||
#include <sys/wait.h>
|
||||
#include <asm/sigcontext.h>
|
||||
#include <asm/ptrace.h>
|
||||
|
||||
#include "../../kselftest.h"
|
||||
|
||||
/* <linux/elf.h> and <sys/auxv.h> don't like each other, so: */
|
||||
#ifndef NT_ARM_ZA
|
||||
#define NT_ARM_ZA 0x40c
|
||||
#endif
|
||||
#ifndef NT_ARM_ZT
|
||||
#define NT_ARM_ZT 0x40d
|
||||
#endif
|
||||
|
||||
#define EXPECTED_TESTS 3
|
||||
|
||||
static int sme_vl;
|
||||
|
||||
static void fill_buf(char *buf, size_t size)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < size; i++)
|
||||
buf[i] = random();
|
||||
}
|
||||
|
||||
static int do_child(void)
|
||||
{
|
||||
if (ptrace(PTRACE_TRACEME, -1, NULL, NULL))
|
||||
ksft_exit_fail_msg("PTRACE_TRACEME", strerror(errno));
|
||||
|
||||
if (raise(SIGSTOP))
|
||||
ksft_exit_fail_msg("raise(SIGSTOP)", strerror(errno));
|
||||
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
|
||||
static struct user_za_header *get_za(pid_t pid, void **buf, size_t *size)
|
||||
{
|
||||
struct user_za_header *za;
|
||||
void *p;
|
||||
size_t sz = sizeof(*za);
|
||||
struct iovec iov;
|
||||
|
||||
while (1) {
|
||||
if (*size < sz) {
|
||||
p = realloc(*buf, sz);
|
||||
if (!p) {
|
||||
errno = ENOMEM;
|
||||
goto error;
|
||||
}
|
||||
|
||||
*buf = p;
|
||||
*size = sz;
|
||||
}
|
||||
|
||||
iov.iov_base = *buf;
|
||||
iov.iov_len = sz;
|
||||
if (ptrace(PTRACE_GETREGSET, pid, NT_ARM_ZA, &iov))
|
||||
goto error;
|
||||
|
||||
za = *buf;
|
||||
if (za->size <= sz)
|
||||
break;
|
||||
|
||||
sz = za->size;
|
||||
}
|
||||
|
||||
return za;
|
||||
|
||||
error:
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static int set_za(pid_t pid, const struct user_za_header *za)
|
||||
{
|
||||
struct iovec iov;
|
||||
|
||||
iov.iov_base = (void *)za;
|
||||
iov.iov_len = za->size;
|
||||
return ptrace(PTRACE_SETREGSET, pid, NT_ARM_ZA, &iov);
|
||||
}
|
||||
|
||||
static int get_zt(pid_t pid, char zt[ZT_SIG_REG_BYTES])
|
||||
{
|
||||
struct iovec iov;
|
||||
|
||||
iov.iov_base = zt;
|
||||
iov.iov_len = ZT_SIG_REG_BYTES;
|
||||
return ptrace(PTRACE_GETREGSET, pid, NT_ARM_ZT, &iov);
|
||||
}
|
||||
|
||||
|
||||
static int set_zt(pid_t pid, const char zt[ZT_SIG_REG_BYTES])
|
||||
{
|
||||
struct iovec iov;
|
||||
|
||||
iov.iov_base = (void *)zt;
|
||||
iov.iov_len = ZT_SIG_REG_BYTES;
|
||||
return ptrace(PTRACE_SETREGSET, pid, NT_ARM_ZT, &iov);
|
||||
}
|
||||
|
||||
/* Reading with ZA disabled returns all zeros */
|
||||
static void ptrace_za_disabled_read_zt(pid_t child)
|
||||
{
|
||||
struct user_za_header za;
|
||||
char zt[ZT_SIG_REG_BYTES];
|
||||
int ret, i;
|
||||
bool fail = false;
|
||||
|
||||
/* Disable PSTATE.ZA using the ZA interface */
|
||||
memset(&za, 0, sizeof(za));
|
||||
za.vl = sme_vl;
|
||||
za.size = sizeof(za);
|
||||
|
||||
ret = set_za(child, &za);
|
||||
if (ret != 0) {
|
||||
ksft_print_msg("Failed to disable ZA\n");
|
||||
fail = true;
|
||||
}
|
||||
|
||||
/* Read back ZT */
|
||||
ret = get_zt(child, zt);
|
||||
if (ret != 0) {
|
||||
ksft_print_msg("Failed to read ZT\n");
|
||||
fail = true;
|
||||
}
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(zt); i++) {
|
||||
if (zt[i]) {
|
||||
ksft_print_msg("zt[%d]: 0x%x != 0\n", i, zt[i]);
|
||||
fail = true;
|
||||
}
|
||||
}
|
||||
|
||||
ksft_test_result(!fail, "ptrace_za_disabled_read_zt\n");
|
||||
}
|
||||
|
||||
/* Writing then reading ZT should return the data written */
|
||||
static void ptrace_set_get_zt(pid_t child)
|
||||
{
|
||||
char zt_in[ZT_SIG_REG_BYTES];
|
||||
char zt_out[ZT_SIG_REG_BYTES];
|
||||
int ret, i;
|
||||
bool fail = false;
|
||||
|
||||
fill_buf(zt_in, sizeof(zt_in));
|
||||
|
||||
ret = set_zt(child, zt_in);
|
||||
if (ret != 0) {
|
||||
ksft_print_msg("Failed to set ZT\n");
|
||||
fail = true;
|
||||
}
|
||||
|
||||
ret = get_zt(child, zt_out);
|
||||
if (ret != 0) {
|
||||
ksft_print_msg("Failed to read ZT\n");
|
||||
fail = true;
|
||||
}
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(zt_in); i++) {
|
||||
if (zt_in[i] != zt_out[i]) {
|
||||
ksft_print_msg("zt[%d]: 0x%x != 0x%x\n", i,
|
||||
zt_in[i], zt_out[i]);
|
||||
fail = true;
|
||||
}
|
||||
}
|
||||
|
||||
ksft_test_result(!fail, "ptrace_set_get_zt\n");
|
||||
}
|
||||
|
||||
/* Writing ZT should set PSTATE.ZA */
|
||||
static void ptrace_enable_za_via_zt(pid_t child)
|
||||
{
|
||||
struct user_za_header za_in;
|
||||
struct user_za_header *za_out;
|
||||
char zt[ZT_SIG_REG_BYTES];
|
||||
char *za_data;
|
||||
size_t za_out_size;
|
||||
int ret, i, vq;
|
||||
bool fail = false;
|
||||
|
||||
/* Disable PSTATE.ZA using the ZA interface */
|
||||
memset(&za_in, 0, sizeof(za_in));
|
||||
za_in.vl = sme_vl;
|
||||
za_in.size = sizeof(za_in);
|
||||
|
||||
ret = set_za(child, &za_in);
|
||||
if (ret != 0) {
|
||||
ksft_print_msg("Failed to disable ZA\n");
|
||||
fail = true;
|
||||
}
|
||||
|
||||
/* Write ZT */
|
||||
fill_buf(zt, sizeof(zt));
|
||||
ret = set_zt(child, zt);
|
||||
if (ret != 0) {
|
||||
ksft_print_msg("Failed to set ZT\n");
|
||||
fail = true;
|
||||
}
|
||||
|
||||
/* Read back ZA and check for register data */
|
||||
za_out = NULL;
|
||||
za_out_size = 0;
|
||||
if (get_za(child, (void **)&za_out, &za_out_size)) {
|
||||
/* Should have an unchanged VL */
|
||||
if (za_out->vl != sme_vl) {
|
||||
ksft_print_msg("VL changed from %d to %d\n",
|
||||
sme_vl, za_out->vl);
|
||||
fail = true;
|
||||
}
|
||||
vq = __sve_vq_from_vl(za_out->vl);
|
||||
za_data = (char *)za_out + ZA_PT_ZA_OFFSET;
|
||||
|
||||
/* Should have register data */
|
||||
if (za_out->size < ZA_PT_SIZE(vq)) {
|
||||
ksft_print_msg("ZA data less than expected: %u < %u\n",
|
||||
za_out->size, ZA_PT_SIZE(vq));
|
||||
fail = true;
|
||||
vq = 0;
|
||||
}
|
||||
|
||||
/* That register data should be non-zero */
|
||||
for (i = 0; i < ZA_PT_ZA_SIZE(vq); i++) {
|
||||
if (za_data[i]) {
|
||||
ksft_print_msg("ZA byte %d is %x\n",
|
||||
i, za_data[i]);
|
||||
fail = true;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
ksft_print_msg("Failed to read ZA\n");
|
||||
fail = true;
|
||||
}
|
||||
|
||||
ksft_test_result(!fail, "ptrace_enable_za_via_zt\n");
|
||||
}
|
||||
|
||||
static int do_parent(pid_t child)
|
||||
{
|
||||
int ret = EXIT_FAILURE;
|
||||
pid_t pid;
|
||||
int status;
|
||||
siginfo_t si;
|
||||
|
||||
/* Attach to the child */
|
||||
while (1) {
|
||||
int sig;
|
||||
|
||||
pid = wait(&status);
|
||||
if (pid == -1) {
|
||||
perror("wait");
|
||||
goto error;
|
||||
}
|
||||
|
||||
/*
|
||||
* This should never happen but it's hard to flag in
|
||||
* the framework.
|
||||
*/
|
||||
if (pid != child)
|
||||
continue;
|
||||
|
||||
if (WIFEXITED(status) || WIFSIGNALED(status))
|
||||
ksft_exit_fail_msg("Child died unexpectedly\n");
|
||||
|
||||
if (!WIFSTOPPED(status))
|
||||
goto error;
|
||||
|
||||
sig = WSTOPSIG(status);
|
||||
|
||||
if (ptrace(PTRACE_GETSIGINFO, pid, NULL, &si)) {
|
||||
if (errno == ESRCH)
|
||||
goto disappeared;
|
||||
|
||||
if (errno == EINVAL) {
|
||||
sig = 0; /* bust group-stop */
|
||||
goto cont;
|
||||
}
|
||||
|
||||
ksft_test_result_fail("PTRACE_GETSIGINFO: %s\n",
|
||||
strerror(errno));
|
||||
goto error;
|
||||
}
|
||||
|
||||
if (sig == SIGSTOP && si.si_code == SI_TKILL &&
|
||||
si.si_pid == pid)
|
||||
break;
|
||||
|
||||
cont:
|
||||
if (ptrace(PTRACE_CONT, pid, NULL, sig)) {
|
||||
if (errno == ESRCH)
|
||||
goto disappeared;
|
||||
|
||||
ksft_test_result_fail("PTRACE_CONT: %s\n",
|
||||
strerror(errno));
|
||||
goto error;
|
||||
}
|
||||
}
|
||||
|
||||
ksft_print_msg("Parent is %d, child is %d\n", getpid(), child);
|
||||
|
||||
ptrace_za_disabled_read_zt(child);
|
||||
ptrace_set_get_zt(child);
|
||||
ptrace_enable_za_via_zt(child);
|
||||
|
||||
ret = EXIT_SUCCESS;
|
||||
|
||||
error:
|
||||
kill(child, SIGKILL);
|
||||
|
||||
disappeared:
|
||||
return ret;
|
||||
}
|
||||
|
||||
int main(void)
|
||||
{
|
||||
int ret = EXIT_SUCCESS;
|
||||
pid_t child;
|
||||
|
||||
srandom(getpid());
|
||||
|
||||
ksft_print_header();
|
||||
|
||||
if (!(getauxval(AT_HWCAP2) & HWCAP2_SME2)) {
|
||||
ksft_set_plan(1);
|
||||
ksft_exit_skip("SME2 not available\n");
|
||||
}
|
||||
|
||||
/* We need a valid SME VL to enable/disable ZA */
|
||||
sme_vl = prctl(PR_SME_GET_VL);
|
||||
if (sme_vl == -1) {
|
||||
ksft_set_plan(1);
|
||||
ksft_exit_skip("Failed to read SME VL: %d (%s)\n",
|
||||
errno, strerror(errno));
|
||||
}
|
||||
|
||||
ksft_set_plan(EXPECTED_TESTS);
|
||||
|
||||
child = fork();
|
||||
if (!child)
|
||||
return do_child();
|
||||
|
||||
if (do_parent(child))
|
||||
ret = EXIT_FAILURE;
|
||||
|
||||
ksft_print_cnts();
|
||||
|
||||
return ret;
|
||||
}
|
|
@ -0,0 +1,316 @@
|
|||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
// Copyright (C) 2021-2 ARM Limited.
|
||||
// Original author: Mark Brown <broonie@kernel.org>
|
||||
//
|
||||
// Scalable Matrix Extension ZT context switch test
|
||||
// Repeatedly writes unique test patterns into ZT0
|
||||
// and reads them back to verify integrity.
|
||||
|
||||
#include <asm/unistd.h>
|
||||
#include "assembler.h"
|
||||
#include "asm-offsets.h"
|
||||
#include "sme-inst.h"
|
||||
|
||||
.arch_extension sve
|
||||
|
||||
#define ZT_SZ 512
|
||||
#define ZT_B (ZT_SZ / 8)
|
||||
|
||||
// Declare some storage space to shadow ZT register contents and a
|
||||
// scratch buffer.
|
||||
.pushsection .text
|
||||
.data
|
||||
.align 4
|
||||
ztref:
|
||||
.space ZT_B
|
||||
scratch:
|
||||
.space ZT_B
|
||||
.popsection
|
||||
|
||||
|
||||
// Generate a test pattern for storage in ZT
|
||||
// x0: pid
|
||||
// x1: generation
|
||||
|
||||
// These values are used to construct a 32-bit pattern that is repeated in the
|
||||
// scratch buffer as many times as will fit:
|
||||
// bits 31:24 generation number (increments once per test_loop)
|
||||
// bits 23: 8 pid
|
||||
// bits 7: 0 32-bit lane index
|
||||
|
||||
function pattern
|
||||
mov w3, wzr
|
||||
bfi w3, w0, #8, #16 // PID
|
||||
bfi w3, w1, #24, #8 // Generation
|
||||
|
||||
ldr x0, =scratch
|
||||
mov w1, #ZT_B / 4
|
||||
|
||||
0: str w3, [x0], #4
|
||||
add w3, w3, #1 // Lane
|
||||
subs w1, w1, #1
|
||||
b.ne 0b
|
||||
|
||||
ret
|
||||
endfunction
|
||||
|
||||
// Set up test pattern in a ZT horizontal vector
|
||||
// x0: pid
|
||||
// x1: generation
|
||||
function setup_zt
|
||||
mov x4, x30
|
||||
|
||||
bl pattern // Get pattern in scratch buffer
|
||||
ldr x0, =ztref
|
||||
ldr x1, =scratch
|
||||
mov x2, #ZT_B
|
||||
bl memcpy
|
||||
|
||||
ldr x0, =ztref
|
||||
_ldr_zt 0 // load zt0 from pointer x0
|
||||
|
||||
ret x4
|
||||
endfunction
|
||||
|
||||
// Trivial memory compare: compare x2 bytes starting at address x0 with
|
||||
// bytes starting at address x1.
|
||||
// Returns only if all bytes match; otherwise, the program is aborted.
|
||||
// Clobbers x0-x5.
|
||||
function memcmp
|
||||
cbz x2, 2f
|
||||
|
||||
stp x0, x1, [sp, #-0x20]!
|
||||
str x2, [sp, #0x10]
|
||||
|
||||
mov x5, #0
|
||||
0: ldrb w3, [x0, x5]
|
||||
ldrb w4, [x1, x5]
|
||||
add x5, x5, #1
|
||||
cmp w3, w4
|
||||
b.ne 1f
|
||||
subs x2, x2, #1
|
||||
b.ne 0b
|
||||
|
||||
1: ldr x2, [sp, #0x10]
|
||||
ldp x0, x1, [sp], #0x20
|
||||
b.ne barf
|
||||
|
||||
2: ret
|
||||
endfunction
|
||||
|
||||
// Verify that a ZT vector matches its shadow in memory, else abort
|
||||
// Clobbers x0-x3
|
||||
function check_zt
|
||||
mov x3, x30
|
||||
|
||||
ldr x0, =scratch // Poison scratch
|
||||
mov x1, #ZT_B
|
||||
bl memfill_ae
|
||||
|
||||
ldr x0, =scratch
|
||||
_str_zt 0
|
||||
|
||||
ldr x0, =ztref
|
||||
ldr x1, =scratch
|
||||
mov x2, #ZT_B
|
||||
mov x30, x3
|
||||
b memcmp
|
||||
endfunction
|
||||
|
||||
// Any SME register modified here can cause corruption in the main
|
||||
// thread -- but *only* the locations modified here.
|
||||
function irritator_handler
|
||||
// Increment the irritation signal count (x23):
|
||||
ldr x0, [x2, #ucontext_regs + 8 * 23]
|
||||
add x0, x0, #1
|
||||
str x0, [x2, #ucontext_regs + 8 * 23]
|
||||
|
||||
// Corrupt some random ZT data
|
||||
#if 0
|
||||
adr x0, .text + (irritator_handler - .text) / 16 * 16
|
||||
movi v0.8b, #1
|
||||
movi v9.16b, #2
|
||||
movi v31.8b, #3
|
||||
#endif
|
||||
|
||||
ret
|
||||
endfunction
|
||||
|
||||
function tickle_handler
|
||||
// Increment the signal count (x23):
|
||||
ldr x0, [x2, #ucontext_regs + 8 * 23]
|
||||
add x0, x0, #1
|
||||
str x0, [x2, #ucontext_regs + 8 * 23]
|
||||
|
||||
ret
|
||||
endfunction
|
||||
|
||||
function terminate_handler
|
||||
mov w21, w0
|
||||
mov x20, x2
|
||||
|
||||
puts "Terminated by signal "
|
||||
mov w0, w21
|
||||
bl putdec
|
||||
puts ", no error, iterations="
|
||||
ldr x0, [x20, #ucontext_regs + 8 * 22]
|
||||
bl putdec
|
||||
puts ", signals="
|
||||
ldr x0, [x20, #ucontext_regs + 8 * 23]
|
||||
bl putdecn
|
||||
|
||||
mov x0, #0
|
||||
mov x8, #__NR_exit
|
||||
svc #0
|
||||
endfunction
|
||||
|
||||
// w0: signal number
|
||||
// x1: sa_action
|
||||
// w2: sa_flags
|
||||
// Clobbers x0-x6,x8
|
||||
function setsignal
|
||||
str x30, [sp, #-((sa_sz + 15) / 16 * 16 + 16)]!
|
||||
|
||||
mov w4, w0
|
||||
mov x5, x1
|
||||
mov w6, w2
|
||||
|
||||
add x0, sp, #16
|
||||
mov x1, #sa_sz
|
||||
bl memclr
|
||||
|
||||
mov w0, w4
|
||||
add x1, sp, #16
|
||||
str w6, [x1, #sa_flags]
|
||||
str x5, [x1, #sa_handler]
|
||||
mov x2, #0
|
||||
mov x3, #sa_mask_sz
|
||||
mov x8, #__NR_rt_sigaction
|
||||
svc #0
|
||||
|
||||
cbz w0, 1f
|
||||
|
||||
puts "sigaction failure\n"
|
||||
b .Labort
|
||||
|
||||
1: ldr x30, [sp], #((sa_sz + 15) / 16 * 16 + 16)
|
||||
ret
|
||||
endfunction
|
||||
|
||||
// Main program entry point
|
||||
.globl _start
|
||||
function _start
|
||||
mov x23, #0 // signal count
|
||||
|
||||
mov w0, #SIGINT
|
||||
adr x1, terminate_handler
|
||||
mov w2, #SA_SIGINFO
|
||||
bl setsignal
|
||||
|
||||
mov w0, #SIGTERM
|
||||
adr x1, terminate_handler
|
||||
mov w2, #SA_SIGINFO
|
||||
bl setsignal
|
||||
|
||||
mov w0, #SIGUSR1
|
||||
adr x1, irritator_handler
|
||||
mov w2, #SA_SIGINFO
|
||||
orr w2, w2, #SA_NODEFER
|
||||
bl setsignal
|
||||
|
||||
mov w0, #SIGUSR2
|
||||
adr x1, tickle_handler
|
||||
mov w2, #SA_SIGINFO
|
||||
orr w2, w2, #SA_NODEFER
|
||||
bl setsignal
|
||||
|
||||
smstart_za
|
||||
|
||||
// Obtain our PID, to ensure test pattern uniqueness between processes
|
||||
mov x8, #__NR_getpid
|
||||
svc #0
|
||||
mov x20, x0
|
||||
|
||||
puts "PID:\t"
|
||||
mov x0, x20
|
||||
bl putdecn
|
||||
|
||||
mov x22, #0 // generation number, increments per iteration
|
||||
.Ltest_loop:
|
||||
mov x0, x20
|
||||
mov x1, x22
|
||||
bl setup_zt
|
||||
|
||||
mov x8, #__NR_sched_yield // Encourage preemption
|
||||
svc #0
|
||||
|
||||
mrs x0, S3_3_C4_C2_2 // SVCR should have ZA=1,SM=0
|
||||
and x1, x0, #3
|
||||
cmp x1, #2
|
||||
b.ne svcr_barf
|
||||
|
||||
bl check_zt
|
||||
|
||||
add x22, x22, #1 // Everything still working
|
||||
b .Ltest_loop
|
||||
|
||||
.Labort:
|
||||
mov x0, #0
|
||||
mov x1, #SIGABRT
|
||||
mov x8, #__NR_kill
|
||||
svc #0
|
||||
endfunction
|
||||
|
||||
function barf
|
||||
// fpsimd.c acitivty log dump hack
|
||||
// ldr w0, =0xdeadc0de
|
||||
// mov w8, #__NR_exit
|
||||
// svc #0
|
||||
// end hack
|
||||
smstop
|
||||
mov x10, x0 // expected data
|
||||
mov x11, x1 // actual data
|
||||
mov x12, x2 // data size
|
||||
|
||||
puts "Mismatch: PID="
|
||||
mov x0, x20
|
||||
bl putdec
|
||||
puts ", iteration="
|
||||
mov x0, x22
|
||||
bl putdec
|
||||
puts "\tExpected ["
|
||||
mov x0, x10
|
||||
mov x1, x12
|
||||
bl dumphex
|
||||
puts "]\n\tGot ["
|
||||
mov x0, x11
|
||||
mov x1, x12
|
||||
bl dumphex
|
||||
puts "]\n"
|
||||
|
||||
mov x8, #__NR_getpid
|
||||
svc #0
|
||||
// fpsimd.c acitivty log dump hack
|
||||
// ldr w0, =0xdeadc0de
|
||||
// mov w8, #__NR_exit
|
||||
// svc #0
|
||||
// ^ end of hack
|
||||
mov x1, #SIGABRT
|
||||
mov x8, #__NR_kill
|
||||
svc #0
|
||||
// mov x8, #__NR_exit
|
||||
// mov x1, #1
|
||||
// svc #0
|
||||
endfunction
|
||||
|
||||
function svcr_barf
|
||||
mov x10, x0
|
||||
|
||||
puts "Bad SVCR: "
|
||||
mov x0, x10
|
||||
bl putdecn
|
||||
|
||||
mov x8, #__NR_exit
|
||||
mov x1, #1
|
||||
svc #0
|
||||
endfunction
|
|
@ -5,4 +5,5 @@ sme_*
|
|||
ssve_*
|
||||
sve_*
|
||||
za_*
|
||||
zt_*
|
||||
!*.[ch]
|
||||
|
|
|
@ -34,6 +34,7 @@ enum {
|
|||
FSVE_BIT,
|
||||
FSME_BIT,
|
||||
FSME_FA64_BIT,
|
||||
FSME2_BIT,
|
||||
FMAX_END
|
||||
};
|
||||
|
||||
|
@ -41,6 +42,7 @@ enum {
|
|||
#define FEAT_SVE (1UL << FSVE_BIT)
|
||||
#define FEAT_SME (1UL << FSME_BIT)
|
||||
#define FEAT_SME_FA64 (1UL << FSME_FA64_BIT)
|
||||
#define FEAT_SME2 (1UL << FSME2_BIT)
|
||||
|
||||
/*
|
||||
* A descriptor used to describe and configure a test case.
|
||||
|
|
|
@ -29,6 +29,7 @@ static char const *const feats_names[FMAX_END] = {
|
|||
" SVE ",
|
||||
" SME ",
|
||||
" FA64 ",
|
||||
" SME2 ",
|
||||
};
|
||||
|
||||
#define MAX_FEATS_SZ 128
|
||||
|
@ -323,6 +324,8 @@ int test_init(struct tdescr *td)
|
|||
td->feats_supported |= FEAT_SME;
|
||||
if (getauxval(AT_HWCAP2) & HWCAP2_SME_FA64)
|
||||
td->feats_supported |= FEAT_SME_FA64;
|
||||
if (getauxval(AT_HWCAP2) & HWCAP2_SME2)
|
||||
td->feats_supported |= FEAT_SME2;
|
||||
if (feats_ok(td)) {
|
||||
if (td->feats_required & td->feats_supported)
|
||||
fprintf(stderr,
|
||||
|
|
|
@ -108,6 +108,26 @@ bool validate_za_context(struct za_context *za, char **err)
|
|||
return true;
|
||||
}
|
||||
|
||||
bool validate_zt_context(struct zt_context *zt, char **err)
|
||||
{
|
||||
if (!zt || !err)
|
||||
return false;
|
||||
|
||||
/* If the context is present there should be at least one register */
|
||||
if (zt->nregs == 0) {
|
||||
*err = "no registers";
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Size should agree with the number of registers */
|
||||
if (zt->head.size != ZT_SIG_CONTEXT_SIZE(zt->nregs)) {
|
||||
*err = "register count does not match size";
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool validate_reserved(ucontext_t *uc, size_t resv_sz, char **err)
|
||||
{
|
||||
bool terminated = false;
|
||||
|
@ -117,6 +137,7 @@ bool validate_reserved(ucontext_t *uc, size_t resv_sz, char **err)
|
|||
struct extra_context *extra = NULL;
|
||||
struct sve_context *sve = NULL;
|
||||
struct za_context *za = NULL;
|
||||
struct zt_context *zt = NULL;
|
||||
struct _aarch64_ctx *head =
|
||||
(struct _aarch64_ctx *)uc->uc_mcontext.__reserved;
|
||||
void *extra_data = NULL;
|
||||
|
@ -177,6 +198,13 @@ bool validate_reserved(ucontext_t *uc, size_t resv_sz, char **err)
|
|||
za = (struct za_context *)head;
|
||||
new_flags |= ZA_CTX;
|
||||
break;
|
||||
case ZT_MAGIC:
|
||||
if (flags & ZT_CTX)
|
||||
*err = "Multiple ZT_MAGIC";
|
||||
/* Size is validated in validate_za_context() */
|
||||
zt = (struct zt_context *)head;
|
||||
new_flags |= ZT_CTX;
|
||||
break;
|
||||
case EXTRA_MAGIC:
|
||||
if (flags & EXTRA_CTX)
|
||||
*err = "Multiple EXTRA_MAGIC";
|
||||
|
@ -234,6 +262,9 @@ bool validate_reserved(ucontext_t *uc, size_t resv_sz, char **err)
|
|||
if (new_flags & ZA_CTX)
|
||||
if (!validate_za_context(za, err))
|
||||
return false;
|
||||
if (new_flags & ZT_CTX)
|
||||
if (!validate_zt_context(zt, err))
|
||||
return false;
|
||||
|
||||
flags |= new_flags;
|
||||
|
||||
|
@ -245,6 +276,11 @@ bool validate_reserved(ucontext_t *uc, size_t resv_sz, char **err)
|
|||
return false;
|
||||
}
|
||||
|
||||
if (terminated && (flags & ZT_CTX) && !(flags & ZA_CTX)) {
|
||||
*err = "ZT context but no ZA context";
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
#define SVE_CTX (1 << 1)
|
||||
#define ZA_CTX (1 << 2)
|
||||
#define EXTRA_CTX (1 << 3)
|
||||
#define ZT_CTX (1 << 4)
|
||||
|
||||
#define KSFT_BAD_MAGIC 0xdeadbeef
|
||||
|
||||
|
|
|
@ -0,0 +1,51 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Copyright (C) 2021 ARM Limited
|
||||
*
|
||||
* Verify that using an instruction not supported in streaming mode
|
||||
* traps when in streaming mode.
|
||||
*/
|
||||
|
||||
#include <signal.h>
|
||||
#include <ucontext.h>
|
||||
#include <sys/prctl.h>
|
||||
|
||||
#include "test_signals_utils.h"
|
||||
#include "testcases.h"
|
||||
|
||||
static union {
|
||||
ucontext_t uc;
|
||||
char buf[1024 * 128];
|
||||
} context;
|
||||
|
||||
int zt_no_regs_run(struct tdescr *td, siginfo_t *si, ucontext_t *uc)
|
||||
{
|
||||
size_t offset;
|
||||
struct _aarch64_ctx *head = GET_BUF_RESV_HEAD(context);
|
||||
|
||||
/*
|
||||
* Get a signal context which should not have a ZT frame and
|
||||
* registers in it.
|
||||
*/
|
||||
if (!get_current_context(td, &context.uc, sizeof(context)))
|
||||
return 1;
|
||||
|
||||
head = get_header(head, ZT_MAGIC, GET_BUF_RESV_SIZE(context), &offset);
|
||||
if (head) {
|
||||
fprintf(stderr, "Got unexpected ZT context\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
td->pass = 1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct tdescr tde = {
|
||||
.name = "ZT register data not present",
|
||||
.descr = "Validate that ZT is not present when ZA is disabled",
|
||||
.feats_required = FEAT_SME2,
|
||||
.timeout = 3,
|
||||
.sanity_disabled = true,
|
||||
.run = zt_no_regs_run,
|
||||
};
|
|
@ -0,0 +1,85 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Copyright (C) 2021 ARM Limited
|
||||
*
|
||||
* Verify that using an instruction not supported in streaming mode
|
||||
* traps when in streaming mode.
|
||||
*/
|
||||
|
||||
#include <signal.h>
|
||||
#include <ucontext.h>
|
||||
#include <sys/prctl.h>
|
||||
|
||||
#include "test_signals_utils.h"
|
||||
#include "testcases.h"
|
||||
|
||||
static union {
|
||||
ucontext_t uc;
|
||||
char buf[1024 * 128];
|
||||
} context;
|
||||
|
||||
static void enable_za(void)
|
||||
{
|
||||
/* smstart za; real data is TODO */
|
||||
asm volatile(".inst 0xd503457f" : : : );
|
||||
}
|
||||
|
||||
int zt_regs_run(struct tdescr *td, siginfo_t *si, ucontext_t *uc)
|
||||
{
|
||||
size_t offset;
|
||||
struct _aarch64_ctx *head = GET_BUF_RESV_HEAD(context);
|
||||
struct zt_context *zt;
|
||||
char *zeros;
|
||||
|
||||
/*
|
||||
* Get a signal context which should have a ZT frame and registers
|
||||
* in it.
|
||||
*/
|
||||
enable_za();
|
||||
if (!get_current_context(td, &context.uc, sizeof(context)))
|
||||
return 1;
|
||||
|
||||
head = get_header(head, ZT_MAGIC, GET_BUF_RESV_SIZE(context), &offset);
|
||||
if (!head) {
|
||||
fprintf(stderr, "No ZT context\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
zt = (struct zt_context *)head;
|
||||
if (zt->nregs == 0) {
|
||||
fprintf(stderr, "Got context with no registers\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
fprintf(stderr, "Got expected size %u for %d registers\n",
|
||||
head->size, zt->nregs);
|
||||
|
||||
/* We didn't load any data into ZT so it should be all zeros */
|
||||
zeros = malloc(ZT_SIG_REGS_SIZE(zt->nregs));
|
||||
if (!zeros) {
|
||||
fprintf(stderr, "Out of memory, nregs=%u\n", zt->nregs);
|
||||
return 1;
|
||||
}
|
||||
memset(zeros, 0, ZT_SIG_REGS_SIZE(zt->nregs));
|
||||
|
||||
if (memcmp(zeros, (char *)zt + ZT_SIG_REGS_OFFSET,
|
||||
ZT_SIG_REGS_SIZE(zt->nregs)) != 0) {
|
||||
fprintf(stderr, "ZT data invalid\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
free(zeros);
|
||||
|
||||
td->pass = 1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct tdescr tde = {
|
||||
.name = "ZT register data",
|
||||
.descr = "Validate that ZT is present and has data when ZA is enabled",
|
||||
.feats_required = FEAT_SME2,
|
||||
.timeout = 3,
|
||||
.sanity_disabled = true,
|
||||
.run = zt_regs_run,
|
||||
};
|
Loading…
Reference in New Issue