2017-11-08 00:58:13 +08:00
|
|
|
//=- AArch64SVEInstrInfo.td - AArch64 SVE Instructions -*- tablegen -*-----=//
|
|
|
|
//
|
2019-01-19 16:50:56 +08:00
|
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
2017-11-08 00:58:13 +08:00
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
//
|
|
|
|
// AArch64 Scalable Vector Extension (SVE) Instruction definitions.
|
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
let Predicates = [HasSVE] in {
|
2018-07-04 20:58:46 +08:00
|
|
|
|
|
|
|
def RDFFR_PPz : sve_int_rdffr_pred<0b0, "rdffr">;
|
|
|
|
def RDFFRS_PPz : sve_int_rdffr_pred<0b1, "rdffrs">;
|
|
|
|
def RDFFR_P : sve_int_rdffr_unpred<"rdffr">;
|
|
|
|
def SETFFR : sve_int_setffr<"setffr">;
|
|
|
|
def WRFFR : sve_int_wrffr<"wrffr">;
|
|
|
|
|
2017-11-08 00:58:13 +08:00
|
|
|
defm ADD_ZZZ : sve_int_bin_cons_arit_0<0b000, "add">;
|
|
|
|
defm SUB_ZZZ : sve_int_bin_cons_arit_0<0b001, "sub">;
|
2018-07-03 17:48:22 +08:00
|
|
|
defm SQADD_ZZZ : sve_int_bin_cons_arit_0<0b100, "sqadd">;
|
|
|
|
defm UQADD_ZZZ : sve_int_bin_cons_arit_0<0b101, "uqadd">;
|
|
|
|
defm SQSUB_ZZZ : sve_int_bin_cons_arit_0<0b110, "sqsub">;
|
|
|
|
defm UQSUB_ZZZ : sve_int_bin_cons_arit_0<0b111, "uqsub">;
|
2017-12-20 19:02:42 +08:00
|
|
|
|
2019-04-29 23:27:27 +08:00
|
|
|
defm AND_ZZZ : sve_int_bin_cons_log<0b00, "and">;
|
|
|
|
defm ORR_ZZZ : sve_int_bin_cons_log<0b01, "orr">;
|
|
|
|
defm EOR_ZZZ : sve_int_bin_cons_log<0b10, "eor">;
|
|
|
|
defm BIC_ZZZ : sve_int_bin_cons_log<0b11, "bic">;
|
2018-02-06 21:13:21 +08:00
|
|
|
|
2018-07-04 22:05:33 +08:00
|
|
|
defm ADD_ZPmZ : sve_int_bin_pred_arit_0<0b000, "add">;
|
|
|
|
defm SUB_ZPmZ : sve_int_bin_pred_arit_0<0b001, "sub">;
|
|
|
|
defm SUBR_ZPmZ : sve_int_bin_pred_arit_0<0b011, "subr">;
|
2018-01-10 01:01:27 +08:00
|
|
|
|
[AArch64][SVE] Asm: Support for AND, ORR, EOR and BIC instructions.
This patch addresses the following variants:
- bitmask immediate, e.g. 'and z0.d, z0.d, #0x6'.
- unpredicated data vectors, e.g. 'and z0.d, z1.d, z2.d'.
- predicated data vectors, e.g. 'and z0.d, p0/m, z0.d, z1.d'.
And also several aliases, such as:
- ORN, alias of ORR.
- EON, alias of EOR.
- BIC, alias of AND (immediate variant)
- MOV, alias of ORR (if unpredicated and source register operands are the same)
Reviewers: rengolin, huntergr, fhahn, samparker, SjoerdMeijer, javed.absar
Reviewed By: fhahn
Differential Revision: https://reviews.llvm.org/D47363
llvm-svn: 333414
2018-05-29 21:08:43 +08:00
|
|
|
defm ORR_ZPmZ : sve_int_bin_pred_log<0b000, "orr">;
|
|
|
|
defm EOR_ZPmZ : sve_int_bin_pred_log<0b001, "eor">;
|
|
|
|
defm AND_ZPmZ : sve_int_bin_pred_log<0b010, "and">;
|
|
|
|
defm BIC_ZPmZ : sve_int_bin_pred_log<0b011, "bic">;
|
|
|
|
|
2018-05-29 18:39:49 +08:00
|
|
|
defm ADD_ZI : sve_int_arith_imm0<0b000, "add">;
|
|
|
|
defm SUB_ZI : sve_int_arith_imm0<0b001, "sub">;
|
2018-07-04 22:05:33 +08:00
|
|
|
defm SUBR_ZI : sve_int_arith_imm0<0b011, "subr">;
|
2018-07-03 17:48:22 +08:00
|
|
|
defm SQADD_ZI : sve_int_arith_imm0<0b100, "sqadd">;
|
|
|
|
defm UQADD_ZI : sve_int_arith_imm0<0b101, "uqadd">;
|
|
|
|
defm SQSUB_ZI : sve_int_arith_imm0<0b110, "sqsub">;
|
|
|
|
defm UQSUB_ZI : sve_int_arith_imm0<0b111, "uqsub">;
|
2018-05-29 18:39:49 +08:00
|
|
|
|
2018-07-17 23:41:58 +08:00
|
|
|
defm MAD_ZPmZZ : sve_int_mladdsub_vvv_pred<0b0, "mad">;
|
|
|
|
defm MSB_ZPmZZ : sve_int_mladdsub_vvv_pred<0b1, "msb">;
|
|
|
|
defm MLA_ZPmZZ : sve_int_mlas_vvv_pred<0b0, "mla">;
|
|
|
|
defm MLS_ZPmZZ : sve_int_mlas_vvv_pred<0b1, "mls">;
|
|
|
|
|
2018-07-27 22:24:55 +08:00
|
|
|
// SVE predicated integer reductions.
|
|
|
|
defm SADDV_VPZ : sve_int_reduce_0_saddv<0b000, "saddv">;
|
|
|
|
defm UADDV_VPZ : sve_int_reduce_0_uaddv<0b001, "uaddv">;
|
|
|
|
defm SMAXV_VPZ : sve_int_reduce_1<0b000, "smaxv">;
|
|
|
|
defm UMAXV_VPZ : sve_int_reduce_1<0b001, "umaxv">;
|
|
|
|
defm SMINV_VPZ : sve_int_reduce_1<0b010, "sminv">;
|
|
|
|
defm UMINV_VPZ : sve_int_reduce_1<0b011, "uminv">;
|
|
|
|
defm ORV_VPZ : sve_int_reduce_2<0b000, "orv">;
|
|
|
|
defm EORV_VPZ : sve_int_reduce_2<0b001, "eorv">;
|
|
|
|
defm ANDV_VPZ : sve_int_reduce_2<0b010, "andv">;
|
|
|
|
|
[AArch64][SVE] Asm: Support for AND, ORR, EOR and BIC instructions.
This patch addresses the following variants:
- bitmask immediate, e.g. 'and z0.d, z0.d, #0x6'.
- unpredicated data vectors, e.g. 'and z0.d, z1.d, z2.d'.
- predicated data vectors, e.g. 'and z0.d, p0/m, z0.d, z1.d'.
And also several aliases, such as:
- ORN, alias of ORR.
- EON, alias of EOR.
- BIC, alias of AND (immediate variant)
- MOV, alias of ORR (if unpredicated and source register operands are the same)
Reviewers: rengolin, huntergr, fhahn, samparker, SjoerdMeijer, javed.absar
Reviewed By: fhahn
Differential Revision: https://reviews.llvm.org/D47363
llvm-svn: 333414
2018-05-29 21:08:43 +08:00
|
|
|
defm ORR_ZI : sve_int_log_imm<0b00, "orr", "orn">;
|
|
|
|
defm EOR_ZI : sve_int_log_imm<0b01, "eor", "eon">;
|
|
|
|
defm AND_ZI : sve_int_log_imm<0b10, "and", "bic">;
|
|
|
|
|
[AArch64][SVE] Asm: Support for signed/unsigned MIN/MAX/ABD
This patch implements the following varieties:
- Unpredicated signed max, e.g. smax z0.h, z1.h, #-128
- Unpredicated signed min, e.g. smin z0.h, z1.h, #-128
- Unpredicated unsigned max, e.g. umax z0.h, z1.h, #255
- Unpredicated unsigned min, e.g. umin z0.h, z1.h, #255
- Predicated signed max, e.g. smax z0.h, p0/m, z0.h, z1.h
- Predicated signed min, e.g. smin z0.h, p0/m, z0.h, z1.h
- Predicated signed abd, e.g. sabd z0.h, p0/m, z0.h, z1.h
- Predicated unsigned max, e.g. umax z0.h, p0/m, z0.h, z1.h
- Predicated unsigned min, e.g. umin z0.h, p0/m, z0.h, z1.h
- Predicated unsigned abd, e.g. uabd z0.h, p0/m, z0.h, z1.h
llvm-svn: 336317
2018-07-05 15:54:10 +08:00
|
|
|
defm SMAX_ZI : sve_int_arith_imm1<0b00, "smax", simm8>;
|
|
|
|
defm SMIN_ZI : sve_int_arith_imm1<0b10, "smin", simm8>;
|
|
|
|
defm UMAX_ZI : sve_int_arith_imm1<0b01, "umax", imm0_255>;
|
|
|
|
defm UMIN_ZI : sve_int_arith_imm1<0b11, "umin", imm0_255>;
|
|
|
|
|
[AArch64][SVE] Asm: Support for integer MUL instructions.
This patch adds the following instructions:
MUL - multiply vectors, e.g.
mul z0.h, p0/m, z0.h, z1.h
- multiply with immediate, e.g.
mul z0.h, z0.h, #127
SMULH - signed multiply returning high half, e.g.
smulh z0.h, p0/m, z0.h, z1.h
UMULH - unsigned multiply returning high half, e.g.
umulh z0.h, p0/m, z0.h, z1.h
llvm-svn: 337358
2018-07-18 16:10:03 +08:00
|
|
|
defm MUL_ZI : sve_int_arith_imm2<"mul">;
|
|
|
|
defm MUL_ZPmZ : sve_int_bin_pred_arit_2<0b000, "mul">;
|
|
|
|
defm SMULH_ZPmZ : sve_int_bin_pred_arit_2<0b010, "smulh">;
|
|
|
|
defm UMULH_ZPmZ : sve_int_bin_pred_arit_2<0b011, "umulh">;
|
|
|
|
|
[AArch64][SVE] Asm: Integer divide instructions.
This patch adds the following predicated instructions:
UDIV Unsigned divide active elements
UDIVR Unsigned divide active elements, reverse form.
SDIV Signed divide active elements
SDIVR Signed divide active elements, reverse form.
e.g.
udiv z0.s, p0/m, z0.s, z1.s
(unsigned divide active elements in z0 by z1, store result in z0)
sdivr z0.s, p0/m, z0.s, z1.s
(signed divide active elements in z1 by z0, store result in z0)
llvm-svn: 337369
2018-07-18 17:17:29 +08:00
|
|
|
defm SDIV_ZPmZ : sve_int_bin_pred_arit_2_div<0b100, "sdiv">;
|
|
|
|
defm UDIV_ZPmZ : sve_int_bin_pred_arit_2_div<0b101, "udiv">;
|
|
|
|
defm SDIVR_ZPmZ : sve_int_bin_pred_arit_2_div<0b110, "sdivr">;
|
|
|
|
defm UDIVR_ZPmZ : sve_int_bin_pred_arit_2_div<0b111, "udivr">;
|
|
|
|
|
[AArch64][SVE] Asm: Support for UDOT/SDOT instructions.
The signed/unsigned DOT instructions perform a dot-product on
quadtuplets from two source vectors and accumulate the result in
the destination register. The instructions come in two forms:
Vector form, e.g.
sdot z0.s, z1.b, z2.b - signed dot product on four 8-bit quad-tuplets,
accumulating results in 32-bit elements.
udot z0.d, z1.h, z2.h - unsigned dot product on four 16-bit quad-tuplets,
accumulating results in 64-bit elements.
Indexed form, e.g.
sdot z0.s, z1.b, z2.b[3] - signed dot product on four 8-bit quad-tuplets
with specified quadtuplet from second
source vector, accumulating results in 32-bit
elements.
udot z0.d, z1.h, z2.h[1] - dot product on four 16-bit quad-tuplets
with specified quadtuplet from second
source vector, accumulating results in 64-bit
elements.
llvm-svn: 337372
2018-07-18 17:37:51 +08:00
|
|
|
defm SDOT_ZZZ : sve_intx_dot<0b0, "sdot">;
|
|
|
|
defm UDOT_ZZZ : sve_intx_dot<0b1, "udot">;
|
|
|
|
|
|
|
|
defm SDOT_ZZZI : sve_intx_dot_by_indexed_elem<0b0, "sdot">;
|
|
|
|
defm UDOT_ZZZI : sve_intx_dot_by_indexed_elem<0b1, "udot">;
|
|
|
|
|
[AArch64][SVE] Asm: Support for predicated unary operations.
The patch includes support for the following instructions:
ABS z0.h, p0/m, z0.h
NEG z0.h, p0/m, z0.h
(S|U)XTB z0.h, p0/m, z0.h
(S|U)XTB z0.s, p0/m, z0.s
(S|U)XTB z0.d, p0/m, z0.d
(S|U)XTH z0.s, p0/m, z0.s
(S|U)XTH z0.d, p0/m, z0.d
(S|U)XTW z0.d, p0/m, z0.d
llvm-svn: 336204
2018-07-03 22:57:48 +08:00
|
|
|
defm SXTB_ZPmZ : sve_int_un_pred_arit_0_h<0b000, "sxtb">;
|
|
|
|
defm UXTB_ZPmZ : sve_int_un_pred_arit_0_h<0b001, "uxtb">;
|
|
|
|
defm SXTH_ZPmZ : sve_int_un_pred_arit_0_w<0b010, "sxth">;
|
|
|
|
defm UXTH_ZPmZ : sve_int_un_pred_arit_0_w<0b011, "uxth">;
|
|
|
|
defm SXTW_ZPmZ : sve_int_un_pred_arit_0_d<0b100, "sxtw">;
|
|
|
|
defm UXTW_ZPmZ : sve_int_un_pred_arit_0_d<0b101, "uxtw">;
|
2019-09-09 19:21:14 +08:00
|
|
|
defm ABS_ZPmZ : sve_int_un_pred_arit_0< 0b110, "abs", int_aarch64_sve_abs>;
|
|
|
|
defm NEG_ZPmZ : sve_int_un_pred_arit_0< 0b111, "neg", int_aarch64_sve_neg>;
|
[AArch64][SVE] Asm: Support for predicated unary operations.
The patch includes support for the following instructions:
ABS z0.h, p0/m, z0.h
NEG z0.h, p0/m, z0.h
(S|U)XTB z0.h, p0/m, z0.h
(S|U)XTB z0.s, p0/m, z0.s
(S|U)XTB z0.d, p0/m, z0.d
(S|U)XTH z0.s, p0/m, z0.s
(S|U)XTH z0.d, p0/m, z0.d
(S|U)XTW z0.d, p0/m, z0.d
llvm-svn: 336204
2018-07-03 22:57:48 +08:00
|
|
|
|
2018-07-10 22:05:55 +08:00
|
|
|
defm CLS_ZPmZ : sve_int_un_pred_arit_1< 0b000, "cls">;
|
|
|
|
defm CLZ_ZPmZ : sve_int_un_pred_arit_1< 0b001, "clz">;
|
|
|
|
defm CNT_ZPmZ : sve_int_un_pred_arit_1< 0b010, "cnt">;
|
|
|
|
defm CNOT_ZPmZ : sve_int_un_pred_arit_1< 0b011, "cnot">;
|
|
|
|
defm NOT_ZPmZ : sve_int_un_pred_arit_1< 0b110, "not">;
|
|
|
|
defm FABS_ZPmZ : sve_int_un_pred_arit_1_fp<0b100, "fabs">;
|
|
|
|
defm FNEG_ZPmZ : sve_int_un_pred_arit_1_fp<0b101, "fneg">;
|
|
|
|
|
[AArch64][SVE] Asm: Support for signed/unsigned MIN/MAX/ABD
This patch implements the following varieties:
- Unpredicated signed max, e.g. smax z0.h, z1.h, #-128
- Unpredicated signed min, e.g. smin z0.h, z1.h, #-128
- Unpredicated unsigned max, e.g. umax z0.h, z1.h, #255
- Unpredicated unsigned min, e.g. umin z0.h, z1.h, #255
- Predicated signed max, e.g. smax z0.h, p0/m, z0.h, z1.h
- Predicated signed min, e.g. smin z0.h, p0/m, z0.h, z1.h
- Predicated signed abd, e.g. sabd z0.h, p0/m, z0.h, z1.h
- Predicated unsigned max, e.g. umax z0.h, p0/m, z0.h, z1.h
- Predicated unsigned min, e.g. umin z0.h, p0/m, z0.h, z1.h
- Predicated unsigned abd, e.g. uabd z0.h, p0/m, z0.h, z1.h
llvm-svn: 336317
2018-07-05 15:54:10 +08:00
|
|
|
defm SMAX_ZPmZ : sve_int_bin_pred_arit_1<0b000, "smax">;
|
|
|
|
defm UMAX_ZPmZ : sve_int_bin_pred_arit_1<0b001, "umax">;
|
|
|
|
defm SMIN_ZPmZ : sve_int_bin_pred_arit_1<0b010, "smin">;
|
|
|
|
defm UMIN_ZPmZ : sve_int_bin_pred_arit_1<0b011, "umin">;
|
|
|
|
defm SABD_ZPmZ : sve_int_bin_pred_arit_1<0b100, "sabd">;
|
|
|
|
defm UABD_ZPmZ : sve_int_bin_pred_arit_1<0b101, "uabd">;
|
|
|
|
|
2018-07-27 20:26:24 +08:00
|
|
|
defm FRECPE_ZZ : sve_fp_2op_u_zd<0b110, "frecpe">;
|
|
|
|
defm FRSQRTE_ZZ : sve_fp_2op_u_zd<0b111, "frsqrte">;
|
|
|
|
|
2018-06-15 21:57:51 +08:00
|
|
|
defm FADD_ZPmI : sve_fp_2op_i_p_zds<0b000, "fadd", sve_fpimm_half_one>;
|
2018-07-17 20:36:08 +08:00
|
|
|
defm FSUB_ZPmI : sve_fp_2op_i_p_zds<0b001, "fsub", sve_fpimm_half_one>;
|
2018-06-15 21:57:51 +08:00
|
|
|
defm FMUL_ZPmI : sve_fp_2op_i_p_zds<0b010, "fmul", sve_fpimm_half_two>;
|
2018-07-17 20:36:08 +08:00
|
|
|
defm FSUBR_ZPmI : sve_fp_2op_i_p_zds<0b011, "fsubr", sve_fpimm_half_one>;
|
|
|
|
defm FMAXNM_ZPmI : sve_fp_2op_i_p_zds<0b100, "fmaxnm", sve_fpimm_zero_one>;
|
|
|
|
defm FMINNM_ZPmI : sve_fp_2op_i_p_zds<0b101, "fminnm", sve_fpimm_zero_one>;
|
2018-06-15 21:57:51 +08:00
|
|
|
defm FMAX_ZPmI : sve_fp_2op_i_p_zds<0b110, "fmax", sve_fpimm_zero_one>;
|
2018-07-17 20:36:08 +08:00
|
|
|
defm FMIN_ZPmI : sve_fp_2op_i_p_zds<0b111, "fmin", sve_fpimm_zero_one>;
|
2018-06-15 21:57:51 +08:00
|
|
|
|
2018-07-17 17:48:57 +08:00
|
|
|
defm FADD_ZPmZ : sve_fp_2op_p_zds<0b0000, "fadd">;
|
|
|
|
defm FSUB_ZPmZ : sve_fp_2op_p_zds<0b0001, "fsub">;
|
|
|
|
defm FMUL_ZPmZ : sve_fp_2op_p_zds<0b0010, "fmul">;
|
|
|
|
defm FSUBR_ZPmZ : sve_fp_2op_p_zds<0b0011, "fsubr">;
|
|
|
|
defm FMAXNM_ZPmZ : sve_fp_2op_p_zds<0b0100, "fmaxnm">;
|
|
|
|
defm FMINNM_ZPmZ : sve_fp_2op_p_zds<0b0101, "fminnm">;
|
|
|
|
defm FMAX_ZPmZ : sve_fp_2op_p_zds<0b0110, "fmax">;
|
|
|
|
defm FMIN_ZPmZ : sve_fp_2op_p_zds<0b0111, "fmin">;
|
|
|
|
defm FABD_ZPmZ : sve_fp_2op_p_zds<0b1000, "fabd">;
|
|
|
|
defm FSCALE_ZPmZ : sve_fp_2op_p_zds<0b1001, "fscale">;
|
|
|
|
defm FMULX_ZPmZ : sve_fp_2op_p_zds<0b1010, "fmulx">;
|
|
|
|
defm FDIVR_ZPmZ : sve_fp_2op_p_zds<0b1100, "fdivr">;
|
|
|
|
defm FDIV_ZPmZ : sve_fp_2op_p_zds<0b1101, "fdiv">;
|
|
|
|
|
2018-07-18 19:59:12 +08:00
|
|
|
defm FADD_ZZZ : sve_fp_3op_u_zd<0b000, "fadd">;
|
|
|
|
defm FSUB_ZZZ : sve_fp_3op_u_zd<0b001, "fsub">;
|
|
|
|
defm FMUL_ZZZ : sve_fp_3op_u_zd<0b010, "fmul">;
|
|
|
|
defm FTSMUL_ZZZ : sve_fp_3op_u_zd<0b011, "ftsmul">;
|
|
|
|
defm FRECPS_ZZZ : sve_fp_3op_u_zd<0b110, "frecps">;
|
|
|
|
defm FRSQRTS_ZZZ : sve_fp_3op_u_zd<0b111, "frsqrts">;
|
|
|
|
|
2018-07-27 20:40:09 +08:00
|
|
|
defm FTSSEL_ZZZ : sve_int_bin_cons_misc_0_b<"ftssel">;
|
|
|
|
|
[AArch64][SVE] Asm: Support for FP Complex ADD/MLA.
The variants added in this patch are:
- Predicated Complex floating point ADD with rotate, e.g.
fcadd z0.h, p0/m, z0.h, z1.h, #90
- Predicated Complex floating point MLA with rotate, e.g.
fcmla z0.h, p0/m, z1.h, z2.h, #180
- Unpredicated Complex floating point MLA with rotate (indexed operand), e.g.
fcmla z0.h, p0/m, z1.h, z2.h[0], #180
Reviewers: rengolin, fhahn, SjoerdMeijer, samparker, javed.absar
Reviewed By: fhahn
Differential Revision: https://reviews.llvm.org/D48824
llvm-svn: 336210
2018-07-04 00:01:27 +08:00
|
|
|
defm FCADD_ZPmZ : sve_fp_fcadd<"fcadd">;
|
|
|
|
defm FCMLA_ZPmZZ : sve_fp_fcmla<"fcmla">;
|
2018-07-03 23:31:04 +08:00
|
|
|
|
2018-07-17 21:58:46 +08:00
|
|
|
defm FMLA_ZPmZZ : sve_fp_3op_p_zds_a<0b00, "fmla">;
|
|
|
|
defm FMLS_ZPmZZ : sve_fp_3op_p_zds_a<0b01, "fmls">;
|
|
|
|
defm FNMLA_ZPmZZ : sve_fp_3op_p_zds_a<0b10, "fnmla">;
|
|
|
|
defm FNMLS_ZPmZZ : sve_fp_3op_p_zds_a<0b11, "fnmls">;
|
|
|
|
|
|
|
|
defm FMAD_ZPmZZ : sve_fp_3op_p_zds_b<0b00, "fmad">;
|
|
|
|
defm FMSB_ZPmZZ : sve_fp_3op_p_zds_b<0b01, "fmsb">;
|
|
|
|
defm FNMAD_ZPmZZ : sve_fp_3op_p_zds_b<0b10, "fnmad">;
|
|
|
|
defm FNMSB_ZPmZZ : sve_fp_3op_p_zds_b<0b11, "fnmsb">;
|
|
|
|
|
2018-07-20 16:47:26 +08:00
|
|
|
defm FTMAD_ZZI : sve_fp_ftmad<"ftmad">;
|
|
|
|
|
2018-07-17 21:58:46 +08:00
|
|
|
defm FMLA_ZZZI : sve_fp_fma_by_indexed_elem<0b0, "fmla">;
|
|
|
|
defm FMLS_ZZZI : sve_fp_fma_by_indexed_elem<0b1, "fmls">;
|
|
|
|
|
[AArch64][SVE] Asm: Support for FP Complex ADD/MLA.
The variants added in this patch are:
- Predicated Complex floating point ADD with rotate, e.g.
fcadd z0.h, p0/m, z0.h, z1.h, #90
- Predicated Complex floating point MLA with rotate, e.g.
fcmla z0.h, p0/m, z1.h, z2.h, #180
- Unpredicated Complex floating point MLA with rotate (indexed operand), e.g.
fcmla z0.h, p0/m, z1.h, z2.h[0], #180
Reviewers: rengolin, fhahn, SjoerdMeijer, samparker, javed.absar
Reviewed By: fhahn
Differential Revision: https://reviews.llvm.org/D48824
llvm-svn: 336210
2018-07-04 00:01:27 +08:00
|
|
|
defm FCMLA_ZZZI : sve_fp_fcmla_by_indexed_elem<"fcmla">;
|
2018-07-03 23:31:04 +08:00
|
|
|
defm FMUL_ZZZI : sve_fp_fmul_by_indexed_elem<"fmul">;
|
|
|
|
|
2018-07-27 21:58:48 +08:00
|
|
|
// SVE floating point reductions.
|
|
|
|
defm FADDA_VPZ : sve_fp_2op_p_vd<0b000, "fadda">;
|
|
|
|
defm FADDV_VPZ : sve_fp_fast_red<0b000, "faddv">;
|
|
|
|
defm FMAXNMV_VPZ : sve_fp_fast_red<0b100, "fmaxnmv">;
|
|
|
|
defm FMINNMV_VPZ : sve_fp_fast_red<0b101, "fminnmv">;
|
|
|
|
defm FMAXV_VPZ : sve_fp_fast_red<0b110, "fmaxv">;
|
|
|
|
defm FMINV_VPZ : sve_fp_fast_red<0b111, "fminv">;
|
|
|
|
|
2018-05-25 17:47:52 +08:00
|
|
|
// Splat immediate (unpredicated)
|
|
|
|
defm DUP_ZI : sve_int_dup_imm<"dup">;
|
2018-06-01 20:54:46 +08:00
|
|
|
defm FDUP_ZI : sve_int_dup_fpimm<"fdup">;
|
2018-06-01 15:25:46 +08:00
|
|
|
defm DUPM_ZI : sve_int_dup_mask_imm<"dupm">;
|
2018-05-25 17:47:52 +08:00
|
|
|
|
2018-06-04 13:40:46 +08:00
|
|
|
// Splat immediate (predicated)
|
2018-06-04 13:58:06 +08:00
|
|
|
defm CPY_ZPmI : sve_int_dup_imm_pred_merge<"cpy">;
|
|
|
|
defm CPY_ZPzI : sve_int_dup_imm_pred_zero<"cpy">;
|
|
|
|
defm FCPY_ZPmI : sve_int_dup_fpimm_pred<"fcpy">;
|
2018-06-04 13:40:46 +08:00
|
|
|
|
2018-06-04 14:40:55 +08:00
|
|
|
// Splat scalar register (unpredicated, GPR or vector + element index)
|
|
|
|
defm DUP_ZR : sve_int_perm_dup_r<"dup">;
|
|
|
|
defm DUP_ZZI : sve_int_perm_dup_i<"dup">;
|
|
|
|
|
2018-06-16 00:39:46 +08:00
|
|
|
// Splat scalar register (predicated)
|
|
|
|
defm CPY_ZPmR : sve_int_perm_cpy_r<"cpy">;
|
|
|
|
defm CPY_ZPmV : sve_int_perm_cpy_v<"cpy">;
|
|
|
|
|
2018-06-17 18:11:04 +08:00
|
|
|
// Select elements from either vector (predicated)
|
|
|
|
defm SEL_ZPZZ : sve_int_sel_vvv<"sel">;
|
|
|
|
|
2018-07-17 16:52:45 +08:00
|
|
|
defm SPLICE_ZPZ : sve_int_perm_splice<"splice">;
|
2018-07-11 19:22:26 +08:00
|
|
|
defm COMPACT_ZPZ : sve_int_perm_compact<"compact">;
|
2018-07-13 16:51:57 +08:00
|
|
|
defm INSR_ZR : sve_int_perm_insrs<"insr">;
|
|
|
|
defm INSR_ZV : sve_int_perm_insrv<"insr">;
|
2018-07-17 16:39:48 +08:00
|
|
|
def EXT_ZZI : sve_int_perm_extract_i<"ext">;
|
2018-07-11 19:22:26 +08:00
|
|
|
|
[AArch64][SVE] Asm: Support for bit/byte reverse operations.
This patch adds the following instructions:
RBIT reverse bits within each active elemnt (predicated), e.g.
rbit z0.d, p0/m, z1.d
for 8, 16, 32 and 64 bit elements.
REV reverse order of elements in data/predicate vector
(unpredicated), e.g.
rev z0.d, z1.d
rev p0.d, p1.d
for 8, 16, 32 and 64 bit elements.
REVB reverse order of bytes within each active element, e.g.
revb z0.d, p0/m, z1.d
for 16, 32 and 64 bit elements.
REVH reverse order of 16-bit half-words within each active
element, e.g.
revh z0.d, p0/m, z1.d
for 32 and 64 bit elements.
REVW reverse order of 32-bit words within each active element,
e.g.
revw z0.d, p0/m, z1.d
for 64 bit elements.
llvm-svn: 337534
2018-07-20 17:00:44 +08:00
|
|
|
defm RBIT_ZPmZ : sve_int_perm_rev_rbit<"rbit">;
|
|
|
|
defm REVB_ZPmZ : sve_int_perm_rev_revb<"revb">;
|
|
|
|
defm REVH_ZPmZ : sve_int_perm_rev_revh<"revh">;
|
|
|
|
defm REVW_ZPmZ : sve_int_perm_rev_revw<"revw">;
|
|
|
|
|
|
|
|
defm REV_PP : sve_int_perm_reverse_p<"rev">;
|
|
|
|
defm REV_ZZ : sve_int_perm_reverse_z<"rev">;
|
|
|
|
|
2018-07-13 17:25:43 +08:00
|
|
|
defm SUNPKLO_ZZ : sve_int_perm_unpk<0b00, "sunpklo">;
|
|
|
|
defm SUNPKHI_ZZ : sve_int_perm_unpk<0b01, "sunpkhi">;
|
|
|
|
defm UUNPKLO_ZZ : sve_int_perm_unpk<0b10, "uunpklo">;
|
|
|
|
defm UUNPKHI_ZZ : sve_int_perm_unpk<0b11, "uunpkhi">;
|
|
|
|
|
|
|
|
def PUNPKLO_PP : sve_int_perm_punpk<0b0, "punpklo">;
|
|
|
|
def PUNPKHI_PP : sve_int_perm_punpk<0b1, "punpkhi">;
|
|
|
|
|
[AArch64][SVE] Asm: Add MOVPRFX instructions.
This patch adds predicated and unpredicated MOVPRFX instructions, which
can be prepended to SVE instructions that are destructive on their first
source operand, to make them a constructive operation, e.g.
add z1.s, p0/m, z1.s, z2.s <=> z1 = z1 + z2
can be made constructive:
movprfx z0, z1
add z0.s, p0/m, z0.s, z2.s <=> z0 = z1 + z2
The predicated MOVPRFX instruction can additionally be used to zero
inactive elements, e.g.
movprfx z0.s, p0/z, z1.s
add z0.s, p0/m, z0.s, z2.s
Not all instructions can be prefixed with the MOVPRFX instruction
which is why this patch also adds a mechanism to validate prefixed
instructions. The exact rules when a MOVPRFX applies is detailed in
the SVE supplement of the Architectural Reference Manual.
This is patch [1/2] in a series to add MOVPRFX instructions:
- Patch [1/2]: https://reviews.llvm.org/D49592
- Patch [2/2]: https://reviews.llvm.org/D49593
Reviewers: rengolin, SjoerdMeijer, samparker, fhahn, javed.absar
Reviewed By: SjoerdMeijer
Differential Revision: https://reviews.llvm.org/D49592
llvm-svn: 338258
2018-07-30 23:42:46 +08:00
|
|
|
defm MOVPRFX_ZPzZ : sve_int_movprfx_pred_zero<0b000, "movprfx">;
|
|
|
|
defm MOVPRFX_ZPmZ : sve_int_movprfx_pred_merge<0b001, "movprfx">;
|
|
|
|
def MOVPRFX_ZZ : sve_int_bin_cons_misc_0_c<0b00000001, "movprfx", ZPRAny>;
|
2018-07-27 20:40:09 +08:00
|
|
|
def FEXPA_ZZ_H : sve_int_bin_cons_misc_0_c<0b01000000, "fexpa", ZPR16>;
|
|
|
|
def FEXPA_ZZ_S : sve_int_bin_cons_misc_0_c<0b10000000, "fexpa", ZPR32>;
|
|
|
|
def FEXPA_ZZ_D : sve_int_bin_cons_misc_0_c<0b11000000, "fexpa", ZPR64>;
|
|
|
|
|
2018-07-28 22:04:52 +08:00
|
|
|
def BRKPA_PPzPP : sve_int_brkp<0b00, "brkpa">;
|
|
|
|
def BRKPAS_PPzPP : sve_int_brkp<0b10, "brkpas">;
|
|
|
|
def BRKPB_PPzPP : sve_int_brkp<0b01, "brkpb">;
|
|
|
|
def BRKPBS_PPzPP : sve_int_brkp<0b11, "brkpbs">;
|
|
|
|
|
|
|
|
def BRKN_PPzP : sve_int_brkn<0b0, "brkn">;
|
|
|
|
def BRKNS_PPzP : sve_int_brkn<0b1, "brkns">;
|
|
|
|
|
|
|
|
defm BRKA_PPzP : sve_int_break_z<0b000, "brka">;
|
|
|
|
defm BRKA_PPmP : sve_int_break_m<0b001, "brka">;
|
|
|
|
defm BRKAS_PPzP : sve_int_break_z<0b010, "brkas">;
|
|
|
|
defm BRKB_PPzP : sve_int_break_z<0b100, "brkb">;
|
|
|
|
defm BRKB_PPmP : sve_int_break_m<0b101, "brkb">;
|
|
|
|
defm BRKBS_PPzP : sve_int_break_z<0b110, "brkbs">;
|
|
|
|
|
2018-07-28 22:18:11 +08:00
|
|
|
def PTEST_PP : sve_int_ptest<0b010000, "ptest">;
|
|
|
|
def PFALSE : sve_int_pfalse<0b000000, "pfalse">;
|
2018-07-29 16:00:16 +08:00
|
|
|
defm PFIRST : sve_int_pfirst<0b00000, "pfirst">;
|
|
|
|
defm PNEXT : sve_int_pnext<0b00110, "pnext">;
|
2018-07-28 22:18:11 +08:00
|
|
|
|
[AArch64][SVE] Asm: Support for bitwise operations on predicate vectors.
This patch adds support for instructions performing bitwise operations
on predicate vectors, including AND, BIC, EOR, NAND, NOR, ORN, ORR, and
their status flag setting variants ANDS, BICS, EORS, NANDS, ORNS, ORRS.
This patch also adds several aliases:
orr p0.b, p1/z, p1.b, p1.b => mov p0.b, p1.b
orrs p0.b, p1/z, p1.b, p1.b => movs p0.b, p1.b
and p0.b, p1/z, p2.b, p2.b => mov p0.b, p1/z, p2.b
ands p0.b, p1/z, p2.b, p2.b => movs p0.b, p1/z, p2.b
eor p0.b, p1/z, p2.b, p1.b => not p0.b, p1/z, p2.b
eors p0.b, p1/z, p2.b, p1.b => nots p0.b, p1/z, p2.b
llvm-svn: 334906
2018-06-17 18:48:21 +08:00
|
|
|
def AND_PPzPP : sve_int_pred_log<0b0000, "and">;
|
|
|
|
def BIC_PPzPP : sve_int_pred_log<0b0001, "bic">;
|
|
|
|
def EOR_PPzPP : sve_int_pred_log<0b0010, "eor">;
|
2018-06-17 18:11:04 +08:00
|
|
|
def SEL_PPPP : sve_int_pred_log<0b0011, "sel">;
|
[AArch64][SVE] Asm: Support for bitwise operations on predicate vectors.
This patch adds support for instructions performing bitwise operations
on predicate vectors, including AND, BIC, EOR, NAND, NOR, ORN, ORR, and
their status flag setting variants ANDS, BICS, EORS, NANDS, ORNS, ORRS.
This patch also adds several aliases:
orr p0.b, p1/z, p1.b, p1.b => mov p0.b, p1.b
orrs p0.b, p1/z, p1.b, p1.b => movs p0.b, p1.b
and p0.b, p1/z, p2.b, p2.b => mov p0.b, p1/z, p2.b
ands p0.b, p1/z, p2.b, p2.b => movs p0.b, p1/z, p2.b
eor p0.b, p1/z, p2.b, p1.b => not p0.b, p1/z, p2.b
eors p0.b, p1/z, p2.b, p1.b => nots p0.b, p1/z, p2.b
llvm-svn: 334906
2018-06-17 18:48:21 +08:00
|
|
|
def ANDS_PPzPP : sve_int_pred_log<0b0100, "ands">;
|
|
|
|
def BICS_PPzPP : sve_int_pred_log<0b0101, "bics">;
|
|
|
|
def EORS_PPzPP : sve_int_pred_log<0b0110, "eors">;
|
|
|
|
def ORR_PPzPP : sve_int_pred_log<0b1000, "orr">;
|
|
|
|
def ORN_PPzPP : sve_int_pred_log<0b1001, "orn">;
|
|
|
|
def NOR_PPzPP : sve_int_pred_log<0b1010, "nor">;
|
|
|
|
def NAND_PPzPP : sve_int_pred_log<0b1011, "nand">;
|
|
|
|
def ORRS_PPzPP : sve_int_pred_log<0b1100, "orrs">;
|
|
|
|
def ORNS_PPzPP : sve_int_pred_log<0b1101, "orns">;
|
|
|
|
def NORS_PPzPP : sve_int_pred_log<0b1110, "nors">;
|
|
|
|
def NANDS_PPzPP : sve_int_pred_log<0b1111, "nands">;
|
2018-06-17 18:11:04 +08:00
|
|
|
|
[AArch64][SVE] Asm: Support for LAST(A|B) and CLAST(A|B) instructions.
The LASTB and LASTA instructions extract the last active element,
or element after the last active, from the source vector.
The added variants are:
Scalar:
last(a|b) w0, p0, z0.b
last(a|b) w0, p0, z0.h
last(a|b) w0, p0, z0.s
last(a|b) x0, p0, z0.d
SIMD & FP Scalar:
last(a|b) b0, p0, z0.b
last(a|b) h0, p0, z0.h
last(a|b) s0, p0, z0.s
last(a|b) d0, p0, z0.d
The CLASTB and CLASTA conditionally extract the last or element after
the last active element from the source vector.
The added variants are:
Scalar:
clast(a|b) w0, p0, w0, z0.b
clast(a|b) w0, p0, w0, z0.h
clast(a|b) w0, p0, w0, z0.s
clast(a|b) x0, p0, x0, z0.d
SIMD & FP Scalar:
clast(a|b) b0, p0, b0, z0.b
clast(a|b) h0, p0, h0, z0.h
clast(a|b) s0, p0, s0, z0.s
clast(a|b) d0, p0, d0, z0.d
Vector:
clast(a|b) z0.b, p0, z0.b, z1.b
clast(a|b) z0.h, p0, z0.h, z1.h
clast(a|b) z0.s, p0, z0.s, z1.s
clast(a|b) z0.d, p0, z0.d, z1.d
Please refer to the architecture specification for more details on
the semantics of the added instructions.
llvm-svn: 336783
2018-07-11 18:08:00 +08:00
|
|
|
defm CLASTA_RPZ : sve_int_perm_clast_rz<0, "clasta">;
|
|
|
|
defm CLASTB_RPZ : sve_int_perm_clast_rz<1, "clastb">;
|
|
|
|
defm CLASTA_VPZ : sve_int_perm_clast_vz<0, "clasta">;
|
|
|
|
defm CLASTB_VPZ : sve_int_perm_clast_vz<1, "clastb">;
|
|
|
|
defm CLASTA_ZPZ : sve_int_perm_clast_zz<0, "clasta">;
|
|
|
|
defm CLASTB_ZPZ : sve_int_perm_clast_zz<1, "clastb">;
|
|
|
|
|
|
|
|
defm LASTA_RPZ : sve_int_perm_last_r<0, "lasta">;
|
|
|
|
defm LASTB_RPZ : sve_int_perm_last_r<1, "lastb">;
|
|
|
|
defm LASTA_VPZ : sve_int_perm_last_v<0, "lasta">;
|
|
|
|
defm LASTB_VPZ : sve_int_perm_last_v<1, "lastb">;
|
|
|
|
|
2018-04-13 22:41:36 +08:00
|
|
|
// continuous load with reg+immediate
|
|
|
|
defm LD1B_IMM : sve_mem_cld_si<0b0000, "ld1b", Z_b, ZPR8>;
|
|
|
|
defm LD1B_H_IMM : sve_mem_cld_si<0b0001, "ld1b", Z_h, ZPR16>;
|
|
|
|
defm LD1B_S_IMM : sve_mem_cld_si<0b0010, "ld1b", Z_s, ZPR32>;
|
|
|
|
defm LD1B_D_IMM : sve_mem_cld_si<0b0011, "ld1b", Z_d, ZPR64>;
|
|
|
|
defm LD1SW_D_IMM : sve_mem_cld_si<0b0100, "ld1sw", Z_d, ZPR64>;
|
|
|
|
defm LD1H_IMM : sve_mem_cld_si<0b0101, "ld1h", Z_h, ZPR16>;
|
|
|
|
defm LD1H_S_IMM : sve_mem_cld_si<0b0110, "ld1h", Z_s, ZPR32>;
|
|
|
|
defm LD1H_D_IMM : sve_mem_cld_si<0b0111, "ld1h", Z_d, ZPR64>;
|
|
|
|
defm LD1SH_D_IMM : sve_mem_cld_si<0b1000, "ld1sh", Z_d, ZPR64>;
|
|
|
|
defm LD1SH_S_IMM : sve_mem_cld_si<0b1001, "ld1sh", Z_s, ZPR32>;
|
|
|
|
defm LD1W_IMM : sve_mem_cld_si<0b1010, "ld1w", Z_s, ZPR32>;
|
|
|
|
defm LD1W_D_IMM : sve_mem_cld_si<0b1011, "ld1w", Z_d, ZPR64>;
|
|
|
|
defm LD1SB_D_IMM : sve_mem_cld_si<0b1100, "ld1sb", Z_d, ZPR64>;
|
|
|
|
defm LD1SB_S_IMM : sve_mem_cld_si<0b1101, "ld1sb", Z_s, ZPR32>;
|
|
|
|
defm LD1SB_H_IMM : sve_mem_cld_si<0b1110, "ld1sb", Z_h, ZPR16>;
|
|
|
|
defm LD1D_IMM : sve_mem_cld_si<0b1111, "ld1d", Z_d, ZPR64>;
|
[AArch64][SVE] Asm: Support for contiguous ST1 (scalar+imm) store instructions.
Summary:
Added instructions for contiguous stores, ST1, with scalar+imm addressing
modes and corresponding tests. The patch also adds parsing of
'mul vl' as needed for the VL-scaled immediate.
This is patch [6/6] in a series to add assembler/disassembler support for
SVE's contiguous ST1 (scalar+imm) instructions.
Reviewers: fhahn, rengolin, javed.absar, huntergr, SjoerdMeijer, t.p.northover, echristo, evandro
Reviewed By: rengolin
Subscribers: tschuett, llvm-commits, kristof.beyls
Differential Revision: https://reviews.llvm.org/D45432
llvm-svn: 330014
2018-04-13 20:56:14 +08:00
|
|
|
|
2018-05-08 18:46:55 +08:00
|
|
|
// LD1R loads (splat scalar to vector)
|
|
|
|
defm LD1RB_IMM : sve_mem_ld_dup<0b00, 0b00, "ld1rb", Z_b, ZPR8, uimm6s1>;
|
|
|
|
defm LD1RB_H_IMM : sve_mem_ld_dup<0b00, 0b01, "ld1rb", Z_h, ZPR16, uimm6s1>;
|
|
|
|
defm LD1RB_S_IMM : sve_mem_ld_dup<0b00, 0b10, "ld1rb", Z_s, ZPR32, uimm6s1>;
|
|
|
|
defm LD1RB_D_IMM : sve_mem_ld_dup<0b00, 0b11, "ld1rb", Z_d, ZPR64, uimm6s1>;
|
|
|
|
defm LD1RSW_IMM : sve_mem_ld_dup<0b01, 0b00, "ld1rsw", Z_d, ZPR64, uimm6s4>;
|
|
|
|
defm LD1RH_IMM : sve_mem_ld_dup<0b01, 0b01, "ld1rh", Z_h, ZPR16, uimm6s2>;
|
|
|
|
defm LD1RH_S_IMM : sve_mem_ld_dup<0b01, 0b10, "ld1rh", Z_s, ZPR32, uimm6s2>;
|
|
|
|
defm LD1RH_D_IMM : sve_mem_ld_dup<0b01, 0b11, "ld1rh", Z_d, ZPR64, uimm6s2>;
|
|
|
|
defm LD1RSH_D_IMM : sve_mem_ld_dup<0b10, 0b00, "ld1rsh", Z_d, ZPR64, uimm6s2>;
|
|
|
|
defm LD1RSH_S_IMM : sve_mem_ld_dup<0b10, 0b01, "ld1rsh", Z_s, ZPR32, uimm6s2>;
|
|
|
|
defm LD1RW_IMM : sve_mem_ld_dup<0b10, 0b10, "ld1rw", Z_s, ZPR32, uimm6s4>;
|
|
|
|
defm LD1RW_D_IMM : sve_mem_ld_dup<0b10, 0b11, "ld1rw", Z_d, ZPR64, uimm6s4>;
|
|
|
|
defm LD1RSB_D_IMM : sve_mem_ld_dup<0b11, 0b00, "ld1rsb", Z_d, ZPR64, uimm6s1>;
|
|
|
|
defm LD1RSB_S_IMM : sve_mem_ld_dup<0b11, 0b01, "ld1rsb", Z_s, ZPR32, uimm6s1>;
|
|
|
|
defm LD1RSB_H_IMM : sve_mem_ld_dup<0b11, 0b10, "ld1rsb", Z_h, ZPR16, uimm6s1>;
|
|
|
|
defm LD1RD_IMM : sve_mem_ld_dup<0b11, 0b11, "ld1rd", Z_d, ZPR64, uimm6s8>;
|
|
|
|
|
|
|
|
// LD1RQ loads (load quadword-vector and splat to scalable vector)
|
2018-05-02 16:49:08 +08:00
|
|
|
defm LD1RQ_B_IMM : sve_mem_ldqr_si<0b00, "ld1rqb", Z_b, ZPR8>;
|
|
|
|
defm LD1RQ_H_IMM : sve_mem_ldqr_si<0b01, "ld1rqh", Z_h, ZPR16>;
|
|
|
|
defm LD1RQ_W_IMM : sve_mem_ldqr_si<0b10, "ld1rqw", Z_s, ZPR32>;
|
|
|
|
defm LD1RQ_D_IMM : sve_mem_ldqr_si<0b11, "ld1rqd", Z_d, ZPR64>;
|
|
|
|
defm LD1RQ_B : sve_mem_ldqr_ss<0b00, "ld1rqb", Z_b, ZPR8, GPR64NoXZRshifted8>;
|
|
|
|
defm LD1RQ_H : sve_mem_ldqr_ss<0b01, "ld1rqh", Z_h, ZPR16, GPR64NoXZRshifted16>;
|
|
|
|
defm LD1RQ_W : sve_mem_ldqr_ss<0b10, "ld1rqw", Z_s, ZPR32, GPR64NoXZRshifted32>;
|
|
|
|
defm LD1RQ_D : sve_mem_ldqr_ss<0b11, "ld1rqd", Z_d, ZPR64, GPR64NoXZRshifted64>;
|
|
|
|
|
2018-04-20 20:52:01 +08:00
|
|
|
// continuous load with reg+reg addressing.
|
|
|
|
defm LD1B : sve_mem_cld_ss<0b0000, "ld1b", Z_b, ZPR8, GPR64NoXZRshifted8>;
|
|
|
|
defm LD1B_H : sve_mem_cld_ss<0b0001, "ld1b", Z_h, ZPR16, GPR64NoXZRshifted8>;
|
|
|
|
defm LD1B_S : sve_mem_cld_ss<0b0010, "ld1b", Z_s, ZPR32, GPR64NoXZRshifted8>;
|
|
|
|
defm LD1B_D : sve_mem_cld_ss<0b0011, "ld1b", Z_d, ZPR64, GPR64NoXZRshifted8>;
|
|
|
|
defm LD1SW_D : sve_mem_cld_ss<0b0100, "ld1sw", Z_d, ZPR64, GPR64NoXZRshifted32>;
|
|
|
|
defm LD1H : sve_mem_cld_ss<0b0101, "ld1h", Z_h, ZPR16, GPR64NoXZRshifted16>;
|
|
|
|
defm LD1H_S : sve_mem_cld_ss<0b0110, "ld1h", Z_s, ZPR32, GPR64NoXZRshifted16>;
|
|
|
|
defm LD1H_D : sve_mem_cld_ss<0b0111, "ld1h", Z_d, ZPR64, GPR64NoXZRshifted16>;
|
|
|
|
defm LD1SH_D : sve_mem_cld_ss<0b1000, "ld1sh", Z_d, ZPR64, GPR64NoXZRshifted16>;
|
|
|
|
defm LD1SH_S : sve_mem_cld_ss<0b1001, "ld1sh", Z_s, ZPR32, GPR64NoXZRshifted16>;
|
|
|
|
defm LD1W : sve_mem_cld_ss<0b1010, "ld1w", Z_s, ZPR32, GPR64NoXZRshifted32>;
|
|
|
|
defm LD1W_D : sve_mem_cld_ss<0b1011, "ld1w", Z_d, ZPR64, GPR64NoXZRshifted32>;
|
|
|
|
defm LD1SB_D : sve_mem_cld_ss<0b1100, "ld1sb", Z_d, ZPR64, GPR64NoXZRshifted8>;
|
|
|
|
defm LD1SB_S : sve_mem_cld_ss<0b1101, "ld1sb", Z_s, ZPR32, GPR64NoXZRshifted8>;
|
|
|
|
defm LD1SB_H : sve_mem_cld_ss<0b1110, "ld1sb", Z_h, ZPR16, GPR64NoXZRshifted8>;
|
|
|
|
defm LD1D : sve_mem_cld_ss<0b1111, "ld1d", Z_d, ZPR64, GPR64NoXZRshifted64>;
|
|
|
|
|
[AArch64][SVE] Asm: Support for contiguous, non-faulting LDNF1 (scalar+imm) load instructions
Reviewers: fhahn, rengolin, javed.absar, huntergr, SjoerdMeijer, t.p.northover, echristo, evandro
Reviewed By: rengolin
Subscribers: tschuett, kristof.beyls, llvm-commits
Differential Revision: https://reviews.llvm.org/D45684
llvm-svn: 330583
2018-04-23 20:43:19 +08:00
|
|
|
// non-faulting continuous load with reg+immediate
|
|
|
|
defm LDNF1B_IMM : sve_mem_cldnf_si<0b0000, "ldnf1b", Z_b, ZPR8>;
|
|
|
|
defm LDNF1B_H_IMM : sve_mem_cldnf_si<0b0001, "ldnf1b", Z_h, ZPR16>;
|
|
|
|
defm LDNF1B_S_IMM : sve_mem_cldnf_si<0b0010, "ldnf1b", Z_s, ZPR32>;
|
|
|
|
defm LDNF1B_D_IMM : sve_mem_cldnf_si<0b0011, "ldnf1b", Z_d, ZPR64>;
|
|
|
|
defm LDNF1SW_D_IMM : sve_mem_cldnf_si<0b0100, "ldnf1sw", Z_d, ZPR64>;
|
|
|
|
defm LDNF1H_IMM : sve_mem_cldnf_si<0b0101, "ldnf1h", Z_h, ZPR16>;
|
|
|
|
defm LDNF1H_S_IMM : sve_mem_cldnf_si<0b0110, "ldnf1h", Z_s, ZPR32>;
|
|
|
|
defm LDNF1H_D_IMM : sve_mem_cldnf_si<0b0111, "ldnf1h", Z_d, ZPR64>;
|
|
|
|
defm LDNF1SH_D_IMM : sve_mem_cldnf_si<0b1000, "ldnf1sh", Z_d, ZPR64>;
|
|
|
|
defm LDNF1SH_S_IMM : sve_mem_cldnf_si<0b1001, "ldnf1sh", Z_s, ZPR32>;
|
|
|
|
defm LDNF1W_IMM : sve_mem_cldnf_si<0b1010, "ldnf1w", Z_s, ZPR32>;
|
|
|
|
defm LDNF1W_D_IMM : sve_mem_cldnf_si<0b1011, "ldnf1w", Z_d, ZPR64>;
|
|
|
|
defm LDNF1SB_D_IMM : sve_mem_cldnf_si<0b1100, "ldnf1sb", Z_d, ZPR64>;
|
|
|
|
defm LDNF1SB_S_IMM : sve_mem_cldnf_si<0b1101, "ldnf1sb", Z_s, ZPR32>;
|
|
|
|
defm LDNF1SB_H_IMM : sve_mem_cldnf_si<0b1110, "ldnf1sb", Z_h, ZPR16>;
|
|
|
|
defm LDNF1D_IMM : sve_mem_cldnf_si<0b1111, "ldnf1d", Z_d, ZPR64>;
|
|
|
|
|
[AArch64][SVE] Asm: Support for contiguous, first-faulting LDFF1 (scalar+scalar) load instructions.
Reviewers: fhahn, rengolin, samparker, SjoerdMeijer, t.p.northover, echristo, evandro, javed.absar
Reviewed By: rengolin
Subscribers: tschuett, kristof.beyls, llvm-commits
Differential Revision: https://reviews.llvm.org/D45946
llvm-svn: 330697
2018-04-24 16:59:08 +08:00
|
|
|
// First-faulting loads with reg+reg addressing.
|
|
|
|
defm LDFF1B : sve_mem_cldff_ss<0b0000, "ldff1b", Z_b, ZPR8, GPR64shifted8>;
|
|
|
|
defm LDFF1B_H : sve_mem_cldff_ss<0b0001, "ldff1b", Z_h, ZPR16, GPR64shifted8>;
|
|
|
|
defm LDFF1B_S : sve_mem_cldff_ss<0b0010, "ldff1b", Z_s, ZPR32, GPR64shifted8>;
|
|
|
|
defm LDFF1B_D : sve_mem_cldff_ss<0b0011, "ldff1b", Z_d, ZPR64, GPR64shifted8>;
|
|
|
|
defm LDFF1SW_D : sve_mem_cldff_ss<0b0100, "ldff1sw", Z_d, ZPR64, GPR64shifted32>;
|
|
|
|
defm LDFF1H : sve_mem_cldff_ss<0b0101, "ldff1h", Z_h, ZPR16, GPR64shifted16>;
|
|
|
|
defm LDFF1H_S : sve_mem_cldff_ss<0b0110, "ldff1h", Z_s, ZPR32, GPR64shifted16>;
|
|
|
|
defm LDFF1H_D : sve_mem_cldff_ss<0b0111, "ldff1h", Z_d, ZPR64, GPR64shifted16>;
|
|
|
|
defm LDFF1SH_D : sve_mem_cldff_ss<0b1000, "ldff1sh", Z_d, ZPR64, GPR64shifted16>;
|
|
|
|
defm LDFF1SH_S : sve_mem_cldff_ss<0b1001, "ldff1sh", Z_s, ZPR32, GPR64shifted16>;
|
|
|
|
defm LDFF1W : sve_mem_cldff_ss<0b1010, "ldff1w", Z_s, ZPR32, GPR64shifted32>;
|
|
|
|
defm LDFF1W_D : sve_mem_cldff_ss<0b1011, "ldff1w", Z_d, ZPR64, GPR64shifted32>;
|
|
|
|
defm LDFF1SB_D : sve_mem_cldff_ss<0b1100, "ldff1sb", Z_d, ZPR64, GPR64shifted8>;
|
|
|
|
defm LDFF1SB_S : sve_mem_cldff_ss<0b1101, "ldff1sb", Z_s, ZPR32, GPR64shifted8>;
|
|
|
|
defm LDFF1SB_H : sve_mem_cldff_ss<0b1110, "ldff1sb", Z_h, ZPR16, GPR64shifted8>;
|
|
|
|
defm LDFF1D : sve_mem_cldff_ss<0b1111, "ldff1d", Z_d, ZPR64, GPR64shifted64>;
|
|
|
|
|
2018-04-16 15:09:29 +08:00
|
|
|
// LD(2|3|4) structured loads with reg+immediate
|
2018-04-30 02:18:21 +08:00
|
|
|
defm LD2B_IMM : sve_mem_eld_si<0b00, 0b01, ZZ_b, "ld2b", simm4s2>;
|
|
|
|
defm LD3B_IMM : sve_mem_eld_si<0b00, 0b10, ZZZ_b, "ld3b", simm4s3>;
|
|
|
|
defm LD4B_IMM : sve_mem_eld_si<0b00, 0b11, ZZZZ_b, "ld4b", simm4s4>;
|
|
|
|
defm LD2H_IMM : sve_mem_eld_si<0b01, 0b01, ZZ_h, "ld2h", simm4s2>;
|
|
|
|
defm LD3H_IMM : sve_mem_eld_si<0b01, 0b10, ZZZ_h, "ld3h", simm4s3>;
|
|
|
|
defm LD4H_IMM : sve_mem_eld_si<0b01, 0b11, ZZZZ_h, "ld4h", simm4s4>;
|
|
|
|
defm LD2W_IMM : sve_mem_eld_si<0b10, 0b01, ZZ_s, "ld2w", simm4s2>;
|
|
|
|
defm LD3W_IMM : sve_mem_eld_si<0b10, 0b10, ZZZ_s, "ld3w", simm4s3>;
|
|
|
|
defm LD4W_IMM : sve_mem_eld_si<0b10, 0b11, ZZZZ_s, "ld4w", simm4s4>;
|
|
|
|
defm LD2D_IMM : sve_mem_eld_si<0b11, 0b01, ZZ_d, "ld2d", simm4s2>;
|
|
|
|
defm LD3D_IMM : sve_mem_eld_si<0b11, 0b10, ZZZ_d, "ld3d", simm4s3>;
|
|
|
|
defm LD4D_IMM : sve_mem_eld_si<0b11, 0b11, ZZZZ_d, "ld4d", simm4s4>;
|
2018-04-16 15:09:29 +08:00
|
|
|
|
2018-05-16 17:16:20 +08:00
|
|
|
// LD(2|3|4) structured loads (register + register)
|
|
|
|
def LD2B : sve_mem_eld_ss<0b00, 0b01, ZZ_b, "ld2b", GPR64NoXZRshifted8>;
|
|
|
|
def LD3B : sve_mem_eld_ss<0b00, 0b10, ZZZ_b, "ld3b", GPR64NoXZRshifted8>;
|
|
|
|
def LD4B : sve_mem_eld_ss<0b00, 0b11, ZZZZ_b, "ld4b", GPR64NoXZRshifted8>;
|
|
|
|
def LD2H : sve_mem_eld_ss<0b01, 0b01, ZZ_h, "ld2h", GPR64NoXZRshifted16>;
|
|
|
|
def LD3H : sve_mem_eld_ss<0b01, 0b10, ZZZ_h, "ld3h", GPR64NoXZRshifted16>;
|
|
|
|
def LD4H : sve_mem_eld_ss<0b01, 0b11, ZZZZ_h, "ld4h", GPR64NoXZRshifted16>;
|
|
|
|
def LD2W : sve_mem_eld_ss<0b10, 0b01, ZZ_s, "ld2w", GPR64NoXZRshifted32>;
|
|
|
|
def LD3W : sve_mem_eld_ss<0b10, 0b10, ZZZ_s, "ld3w", GPR64NoXZRshifted32>;
|
|
|
|
def LD4W : sve_mem_eld_ss<0b10, 0b11, ZZZZ_s, "ld4w", GPR64NoXZRshifted32>;
|
|
|
|
def LD2D : sve_mem_eld_ss<0b11, 0b01, ZZ_d, "ld2d", GPR64NoXZRshifted64>;
|
|
|
|
def LD3D : sve_mem_eld_ss<0b11, 0b10, ZZZ_d, "ld3d", GPR64NoXZRshifted64>;
|
|
|
|
def LD4D : sve_mem_eld_ss<0b11, 0b11, ZZZZ_d, "ld4d", GPR64NoXZRshifted64>;
|
|
|
|
|
2018-04-26 16:19:53 +08:00
|
|
|
// Gathers using unscaled 32-bit offsets, e.g.
|
|
|
|
// ld1h z0.s, p0/z, [x0, z0.s, uxtw]
|
[AArch64][SVE] Asm: Improve diagnostics for gather loads.
This patch extends the 'isSVEVectorRegWithShiftExtend' function to
improve diagnostics for SVE's gather load (scalar + vector) addressing
modes. Instead of always suggesting the 'unscaled' addressing mode,
the use of DiagnosticPredicate enables a more specific error message
in the context where the scaling is incorrect. For example:
ld1h z0.d, p0/z, [x0, z0.d, lsl #2]
^
shift amount should be '1'
Instead of suggesting the packed, unscaled addressing mode:
expected 'z[0..31].d, (uxtw|sxtw)'
the assembler now suggests using the proper scaling:
expected 'z[0..31].d, (lsl|uxtw|sxtw) #1'
Reviewers: fhahn, rengolin, samparker, SjoerdMeijer, javed.absar
Reviewed By: fhahn
Differential Revision: https://reviews.llvm.org/D46124
llvm-svn: 331162
2018-04-30 15:24:38 +08:00
|
|
|
defm GLD1SB_S : sve_mem_32b_gld_vs_32_unscaled<0b0000, "ld1sb", ZPR32ExtSXTW8Only, ZPR32ExtUXTW8Only>;
|
|
|
|
defm GLDFF1SB_S : sve_mem_32b_gld_vs_32_unscaled<0b0001, "ldff1sb", ZPR32ExtSXTW8Only, ZPR32ExtUXTW8Only>;
|
|
|
|
defm GLD1B_S : sve_mem_32b_gld_vs_32_unscaled<0b0010, "ld1b", ZPR32ExtSXTW8Only, ZPR32ExtUXTW8Only>;
|
|
|
|
defm GLDFF1B_S : sve_mem_32b_gld_vs_32_unscaled<0b0011, "ldff1b", ZPR32ExtSXTW8Only, ZPR32ExtUXTW8Only>;
|
2018-04-26 16:19:53 +08:00
|
|
|
defm GLD1SH_S : sve_mem_32b_gld_vs_32_unscaled<0b0100, "ld1sh", ZPR32ExtSXTW8, ZPR32ExtUXTW8>;
|
|
|
|
defm GLDFF1SH_S : sve_mem_32b_gld_vs_32_unscaled<0b0101, "ldff1sh", ZPR32ExtSXTW8, ZPR32ExtUXTW8>;
|
|
|
|
defm GLD1H_S : sve_mem_32b_gld_vs_32_unscaled<0b0110, "ld1h", ZPR32ExtSXTW8, ZPR32ExtUXTW8>;
|
|
|
|
defm GLDFF1H_S : sve_mem_32b_gld_vs_32_unscaled<0b0111, "ldff1h", ZPR32ExtSXTW8, ZPR32ExtUXTW8>;
|
|
|
|
defm GLD1W : sve_mem_32b_gld_vs_32_unscaled<0b1010, "ld1w", ZPR32ExtSXTW8, ZPR32ExtUXTW8>;
|
|
|
|
defm GLDFF1W : sve_mem_32b_gld_vs_32_unscaled<0b1011, "ldff1w", ZPR32ExtSXTW8, ZPR32ExtUXTW8>;
|
|
|
|
|
|
|
|
// Gathers using scaled 32-bit offsets, e.g.
|
|
|
|
// ld1h z0.s, p0/z, [x0, z0.s, uxtw #1]
|
|
|
|
defm GLD1SH_S : sve_mem_32b_gld_sv_32_scaled<0b0100, "ld1sh", ZPR32ExtSXTW16, ZPR32ExtUXTW16>;
|
|
|
|
defm GLDFF1SH_S : sve_mem_32b_gld_sv_32_scaled<0b0101, "ldff1sh", ZPR32ExtSXTW16, ZPR32ExtUXTW16>;
|
|
|
|
defm GLD1H_S : sve_mem_32b_gld_sv_32_scaled<0b0110, "ld1h", ZPR32ExtSXTW16, ZPR32ExtUXTW16>;
|
|
|
|
defm GLDFF1H_S : sve_mem_32b_gld_sv_32_scaled<0b0111, "ldff1h", ZPR32ExtSXTW16, ZPR32ExtUXTW16>;
|
|
|
|
defm GLD1W : sve_mem_32b_gld_sv_32_scaled<0b1010, "ld1w", ZPR32ExtSXTW32, ZPR32ExtUXTW32>;
|
|
|
|
defm GLDFF1W : sve_mem_32b_gld_sv_32_scaled<0b1011, "ldff1w", ZPR32ExtSXTW32, ZPR32ExtUXTW32>;
|
|
|
|
|
2018-04-30 01:33:38 +08:00
|
|
|
// Gathers using scaled 32-bit pointers with offset, e.g.
|
|
|
|
// ld1h z0.s, p0/z, [z0.s, #16]
|
|
|
|
defm GLD1SB_S : sve_mem_32b_gld_vi_32_ptrs<0b0000, "ld1sb", imm0_31>;
|
|
|
|
defm GLDFF1SB_S : sve_mem_32b_gld_vi_32_ptrs<0b0001, "ldff1sb", imm0_31>;
|
|
|
|
defm GLD1B_S : sve_mem_32b_gld_vi_32_ptrs<0b0010, "ld1b", imm0_31>;
|
|
|
|
defm GLDFF1B_S : sve_mem_32b_gld_vi_32_ptrs<0b0011, "ldff1b", imm0_31>;
|
|
|
|
defm GLD1SH_S : sve_mem_32b_gld_vi_32_ptrs<0b0100, "ld1sh", uimm5s2>;
|
|
|
|
defm GLDFF1SH_S : sve_mem_32b_gld_vi_32_ptrs<0b0101, "ldff1sh", uimm5s2>;
|
|
|
|
defm GLD1H_S : sve_mem_32b_gld_vi_32_ptrs<0b0110, "ld1h", uimm5s2>;
|
|
|
|
defm GLDFF1H_S : sve_mem_32b_gld_vi_32_ptrs<0b0111, "ldff1h", uimm5s2>;
|
|
|
|
defm GLD1W : sve_mem_32b_gld_vi_32_ptrs<0b1010, "ld1w", uimm5s4>;
|
|
|
|
defm GLDFF1W : sve_mem_32b_gld_vi_32_ptrs<0b1011, "ldff1w", uimm5s4>;
|
|
|
|
|
|
|
|
// Gathers using scaled 64-bit pointers with offset, e.g.
|
|
|
|
// ld1h z0.d, p0/z, [z0.d, #16]
|
|
|
|
defm GLD1SB_D : sve_mem_64b_gld_vi_64_ptrs<0b0000, "ld1sb", imm0_31>;
|
|
|
|
defm GLDFF1SB_D : sve_mem_64b_gld_vi_64_ptrs<0b0001, "ldff1sb", imm0_31>;
|
|
|
|
defm GLD1B_D : sve_mem_64b_gld_vi_64_ptrs<0b0010, "ld1b", imm0_31>;
|
|
|
|
defm GLDFF1B_D : sve_mem_64b_gld_vi_64_ptrs<0b0011, "ldff1b", imm0_31>;
|
|
|
|
defm GLD1SH_D : sve_mem_64b_gld_vi_64_ptrs<0b0100, "ld1sh", uimm5s2>;
|
|
|
|
defm GLDFF1SH_D : sve_mem_64b_gld_vi_64_ptrs<0b0101, "ldff1sh", uimm5s2>;
|
|
|
|
defm GLD1H_D : sve_mem_64b_gld_vi_64_ptrs<0b0110, "ld1h", uimm5s2>;
|
|
|
|
defm GLDFF1H_D : sve_mem_64b_gld_vi_64_ptrs<0b0111, "ldff1h", uimm5s2>;
|
|
|
|
defm GLD1SW_D : sve_mem_64b_gld_vi_64_ptrs<0b1000, "ld1sw", uimm5s4>;
|
|
|
|
defm GLDFF1SW_D : sve_mem_64b_gld_vi_64_ptrs<0b1001, "ldff1sw", uimm5s4>;
|
|
|
|
defm GLD1W_D : sve_mem_64b_gld_vi_64_ptrs<0b1010, "ld1w", uimm5s4>;
|
|
|
|
defm GLDFF1W_D : sve_mem_64b_gld_vi_64_ptrs<0b1011, "ldff1w", uimm5s4>;
|
|
|
|
defm GLD1D : sve_mem_64b_gld_vi_64_ptrs<0b1110, "ld1d", uimm5s8>;
|
|
|
|
defm GLDFF1D : sve_mem_64b_gld_vi_64_ptrs<0b1111, "ldff1d", uimm5s8>;
|
|
|
|
|
2018-04-26 16:19:53 +08:00
|
|
|
// Gathers using unscaled 64-bit offsets, e.g.
|
|
|
|
// ld1h z0.d, p0/z, [x0, z0.d]
|
|
|
|
defm GLD1SB_D : sve_mem_64b_gld_vs2_64_unscaled<0b0000, "ld1sb">;
|
|
|
|
defm GLDFF1SB_D : sve_mem_64b_gld_vs2_64_unscaled<0b0001, "ldff1sb">;
|
|
|
|
defm GLD1B_D : sve_mem_64b_gld_vs2_64_unscaled<0b0010, "ld1b">;
|
|
|
|
defm GLDFF1B_D : sve_mem_64b_gld_vs2_64_unscaled<0b0011, "ldff1b">;
|
|
|
|
defm GLD1SH_D : sve_mem_64b_gld_vs2_64_unscaled<0b0100, "ld1sh">;
|
|
|
|
defm GLDFF1SH_D : sve_mem_64b_gld_vs2_64_unscaled<0b0101, "ldff1sh">;
|
|
|
|
defm GLD1H_D : sve_mem_64b_gld_vs2_64_unscaled<0b0110, "ld1h">;
|
|
|
|
defm GLDFF1H_D : sve_mem_64b_gld_vs2_64_unscaled<0b0111, "ldff1h">;
|
|
|
|
defm GLD1SW_D : sve_mem_64b_gld_vs2_64_unscaled<0b1000, "ld1sw">;
|
|
|
|
defm GLDFF1SW_D : sve_mem_64b_gld_vs2_64_unscaled<0b1001, "ldff1sw">;
|
|
|
|
defm GLD1W_D : sve_mem_64b_gld_vs2_64_unscaled<0b1010, "ld1w">;
|
|
|
|
defm GLDFF1W_D : sve_mem_64b_gld_vs2_64_unscaled<0b1011, "ldff1w">;
|
|
|
|
defm GLD1D : sve_mem_64b_gld_vs2_64_unscaled<0b1110, "ld1d">;
|
|
|
|
defm GLDFF1D : sve_mem_64b_gld_vs2_64_unscaled<0b1111, "ldff1d">;
|
|
|
|
|
|
|
|
// Gathers using scaled 64-bit offsets, e.g.
|
|
|
|
// ld1h z0.d, p0/z, [x0, z0.d, lsl #1]
|
|
|
|
defm GLD1SH_D : sve_mem_64b_gld_sv2_64_scaled<0b0100, "ld1sh", ZPR64ExtLSL16>;
|
|
|
|
defm GLDFF1SH_D : sve_mem_64b_gld_sv2_64_scaled<0b0101, "ldff1sh", ZPR64ExtLSL16>;
|
|
|
|
defm GLD1H_D : sve_mem_64b_gld_sv2_64_scaled<0b0110, "ld1h", ZPR64ExtLSL16>;
|
|
|
|
defm GLDFF1H_D : sve_mem_64b_gld_sv2_64_scaled<0b0111, "ldff1h", ZPR64ExtLSL16>;
|
|
|
|
defm GLD1SW_D : sve_mem_64b_gld_sv2_64_scaled<0b1000, "ld1sw", ZPR64ExtLSL32>;
|
|
|
|
defm GLDFF1SW_D : sve_mem_64b_gld_sv2_64_scaled<0b1001, "ldff1sw", ZPR64ExtLSL32>;
|
|
|
|
defm GLD1W_D : sve_mem_64b_gld_sv2_64_scaled<0b1010, "ld1w", ZPR64ExtLSL32>;
|
|
|
|
defm GLDFF1W_D : sve_mem_64b_gld_sv2_64_scaled<0b1011, "ldff1w", ZPR64ExtLSL32>;
|
|
|
|
defm GLD1D : sve_mem_64b_gld_sv2_64_scaled<0b1110, "ld1d", ZPR64ExtLSL64>;
|
|
|
|
defm GLDFF1D : sve_mem_64b_gld_sv2_64_scaled<0b1111, "ldff1d", ZPR64ExtLSL64>;
|
|
|
|
|
|
|
|
// Gathers using unscaled 32-bit offsets unpacked in 64-bits elements, e.g.
|
|
|
|
// ld1h z0.d, p0/z, [x0, z0.d, uxtw]
|
[AArch64][SVE] Asm: Improve diagnostics for gather loads.
This patch extends the 'isSVEVectorRegWithShiftExtend' function to
improve diagnostics for SVE's gather load (scalar + vector) addressing
modes. Instead of always suggesting the 'unscaled' addressing mode,
the use of DiagnosticPredicate enables a more specific error message
in the context where the scaling is incorrect. For example:
ld1h z0.d, p0/z, [x0, z0.d, lsl #2]
^
shift amount should be '1'
Instead of suggesting the packed, unscaled addressing mode:
expected 'z[0..31].d, (uxtw|sxtw)'
the assembler now suggests using the proper scaling:
expected 'z[0..31].d, (lsl|uxtw|sxtw) #1'
Reviewers: fhahn, rengolin, samparker, SjoerdMeijer, javed.absar
Reviewed By: fhahn
Differential Revision: https://reviews.llvm.org/D46124
llvm-svn: 331162
2018-04-30 15:24:38 +08:00
|
|
|
defm GLD1SB_D : sve_mem_64b_gld_vs_32_unscaled<0b0000, "ld1sb", ZPR64ExtSXTW8Only, ZPR64ExtUXTW8Only>;
|
|
|
|
defm GLDFF1SB_D : sve_mem_64b_gld_vs_32_unscaled<0b0001, "ldff1sb", ZPR64ExtSXTW8Only, ZPR64ExtUXTW8Only>;
|
|
|
|
defm GLD1B_D : sve_mem_64b_gld_vs_32_unscaled<0b0010, "ld1b", ZPR64ExtSXTW8Only, ZPR64ExtUXTW8Only>;
|
|
|
|
defm GLDFF1B_D : sve_mem_64b_gld_vs_32_unscaled<0b0011, "ldff1b", ZPR64ExtSXTW8Only, ZPR64ExtUXTW8Only>;
|
2018-04-26 16:19:53 +08:00
|
|
|
defm GLD1SH_D : sve_mem_64b_gld_vs_32_unscaled<0b0100, "ld1sh", ZPR64ExtSXTW8, ZPR64ExtUXTW8>;
|
|
|
|
defm GLDFF1SH_D : sve_mem_64b_gld_vs_32_unscaled<0b0101, "ldff1sh", ZPR64ExtSXTW8, ZPR64ExtUXTW8>;
|
|
|
|
defm GLD1H_D : sve_mem_64b_gld_vs_32_unscaled<0b0110, "ld1h", ZPR64ExtSXTW8, ZPR64ExtUXTW8>;
|
|
|
|
defm GLDFF1H_D : sve_mem_64b_gld_vs_32_unscaled<0b0111, "ldff1h", ZPR64ExtSXTW8, ZPR64ExtUXTW8>;
|
|
|
|
defm GLD1SW_D : sve_mem_64b_gld_vs_32_unscaled<0b1000, "ld1sw", ZPR64ExtSXTW8, ZPR64ExtUXTW8>;
|
|
|
|
defm GLDFF1SW_D : sve_mem_64b_gld_vs_32_unscaled<0b1001, "ldff1sw", ZPR64ExtSXTW8, ZPR64ExtUXTW8>;
|
|
|
|
defm GLD1W_D : sve_mem_64b_gld_vs_32_unscaled<0b1010, "ld1w", ZPR64ExtSXTW8, ZPR64ExtUXTW8>;
|
|
|
|
defm GLDFF1W_D : sve_mem_64b_gld_vs_32_unscaled<0b1011, "ldff1w", ZPR64ExtSXTW8, ZPR64ExtUXTW8>;
|
|
|
|
defm GLD1D : sve_mem_64b_gld_vs_32_unscaled<0b1110, "ld1d", ZPR64ExtSXTW8, ZPR64ExtUXTW8>;
|
|
|
|
defm GLDFF1D : sve_mem_64b_gld_vs_32_unscaled<0b1111, "ldff1d", ZPR64ExtSXTW8, ZPR64ExtUXTW8>;
|
|
|
|
|
|
|
|
// Gathers using scaled 32-bit offsets unpacked in 64-bits elements, e.g.
|
|
|
|
// ld1h z0.d, p0/z, [x0, z0.d, uxtw #1]
|
|
|
|
defm GLD1SH_D : sve_mem_64b_gld_sv_32_scaled<0b0100, "ld1sh", ZPR64ExtSXTW16, ZPR64ExtUXTW16>;
|
|
|
|
defm GLDFF1SH_D : sve_mem_64b_gld_sv_32_scaled<0b0101, "ldff1sh",ZPR64ExtSXTW16, ZPR64ExtUXTW16>;
|
|
|
|
defm GLD1H_D : sve_mem_64b_gld_sv_32_scaled<0b0110, "ld1h", ZPR64ExtSXTW16, ZPR64ExtUXTW16>;
|
|
|
|
defm GLDFF1H_D : sve_mem_64b_gld_sv_32_scaled<0b0111, "ldff1h", ZPR64ExtSXTW16, ZPR64ExtUXTW16>;
|
|
|
|
defm GLD1SW_D : sve_mem_64b_gld_sv_32_scaled<0b1000, "ld1sw", ZPR64ExtSXTW32, ZPR64ExtUXTW32>;
|
|
|
|
defm GLDFF1SW_D : sve_mem_64b_gld_sv_32_scaled<0b1001, "ldff1sw",ZPR64ExtSXTW32, ZPR64ExtUXTW32>;
|
|
|
|
defm GLD1W_D : sve_mem_64b_gld_sv_32_scaled<0b1010, "ld1w", ZPR64ExtSXTW32, ZPR64ExtUXTW32>;
|
|
|
|
defm GLDFF1W_D : sve_mem_64b_gld_sv_32_scaled<0b1011, "ldff1w", ZPR64ExtSXTW32, ZPR64ExtUXTW32>;
|
|
|
|
defm GLD1D : sve_mem_64b_gld_sv_32_scaled<0b1110, "ld1d", ZPR64ExtSXTW64, ZPR64ExtUXTW64>;
|
|
|
|
defm GLDFF1D : sve_mem_64b_gld_sv_32_scaled<0b1111, "ldff1d", ZPR64ExtSXTW64, ZPR64ExtUXTW64>;
|
|
|
|
|
2018-05-02 19:48:49 +08:00
|
|
|
// Non-temporal contiguous loads (register + immediate)
|
|
|
|
defm LDNT1B_ZRI : sve_mem_cldnt_si<0b00, "ldnt1b", Z_b, ZPR8>;
|
|
|
|
defm LDNT1H_ZRI : sve_mem_cldnt_si<0b01, "ldnt1h", Z_h, ZPR16>;
|
|
|
|
defm LDNT1W_ZRI : sve_mem_cldnt_si<0b10, "ldnt1w", Z_s, ZPR32>;
|
|
|
|
defm LDNT1D_ZRI : sve_mem_cldnt_si<0b11, "ldnt1d", Z_d, ZPR64>;
|
|
|
|
|
|
|
|
// Non-temporal contiguous loads (register + register)
|
|
|
|
defm LDNT1B_ZRR : sve_mem_cldnt_ss<0b00, "ldnt1b", Z_b, ZPR8, GPR64NoXZRshifted8>;
|
|
|
|
defm LDNT1H_ZRR : sve_mem_cldnt_ss<0b01, "ldnt1h", Z_h, ZPR16, GPR64NoXZRshifted16>;
|
|
|
|
defm LDNT1W_ZRR : sve_mem_cldnt_ss<0b10, "ldnt1w", Z_s, ZPR32, GPR64NoXZRshifted32>;
|
|
|
|
defm LDNT1D_ZRR : sve_mem_cldnt_ss<0b11, "ldnt1d", Z_d, ZPR64, GPR64NoXZRshifted64>;
|
|
|
|
|
2018-04-26 16:19:53 +08:00
|
|
|
// contiguous store with immediates
|
[AArch64][SVE] Asm: Support for contiguous ST1 (scalar+imm) store instructions.
Summary:
Added instructions for contiguous stores, ST1, with scalar+imm addressing
modes and corresponding tests. The patch also adds parsing of
'mul vl' as needed for the VL-scaled immediate.
This is patch [6/6] in a series to add assembler/disassembler support for
SVE's contiguous ST1 (scalar+imm) instructions.
Reviewers: fhahn, rengolin, javed.absar, huntergr, SjoerdMeijer, t.p.northover, echristo, evandro
Reviewed By: rengolin
Subscribers: tschuett, llvm-commits, kristof.beyls
Differential Revision: https://reviews.llvm.org/D45432
llvm-svn: 330014
2018-04-13 20:56:14 +08:00
|
|
|
defm ST1B_IMM : sve_mem_cst_si<0b00, 0b00, "st1b", Z_b, ZPR8>;
|
|
|
|
defm ST1B_H_IMM : sve_mem_cst_si<0b00, 0b01, "st1b", Z_h, ZPR16>;
|
|
|
|
defm ST1B_S_IMM : sve_mem_cst_si<0b00, 0b10, "st1b", Z_s, ZPR32>;
|
|
|
|
defm ST1B_D_IMM : sve_mem_cst_si<0b00, 0b11, "st1b", Z_d, ZPR64>;
|
|
|
|
defm ST1H_IMM : sve_mem_cst_si<0b01, 0b01, "st1h", Z_h, ZPR16>;
|
|
|
|
defm ST1H_S_IMM : sve_mem_cst_si<0b01, 0b10, "st1h", Z_s, ZPR32>;
|
|
|
|
defm ST1H_D_IMM : sve_mem_cst_si<0b01, 0b11, "st1h", Z_d, ZPR64>;
|
|
|
|
defm ST1W_IMM : sve_mem_cst_si<0b10, 0b10, "st1w", Z_s, ZPR32>;
|
|
|
|
defm ST1W_D_IMM : sve_mem_cst_si<0b10, 0b11, "st1w", Z_d, ZPR64>;
|
|
|
|
defm ST1D_IMM : sve_mem_cst_si<0b11, 0b11, "st1d", Z_d, ZPR64>;
|
|
|
|
|
2018-05-01 21:36:03 +08:00
|
|
|
// contiguous store with reg+reg addressing.
|
|
|
|
defm ST1B : sve_mem_cst_ss<0b0000, "st1b", Z_b, ZPR8, GPR64NoXZRshifted8>;
|
|
|
|
defm ST1B_H : sve_mem_cst_ss<0b0001, "st1b", Z_h, ZPR16, GPR64NoXZRshifted8>;
|
|
|
|
defm ST1B_S : sve_mem_cst_ss<0b0010, "st1b", Z_s, ZPR32, GPR64NoXZRshifted8>;
|
|
|
|
defm ST1B_D : sve_mem_cst_ss<0b0011, "st1b", Z_d, ZPR64, GPR64NoXZRshifted8>;
|
|
|
|
defm ST1H : sve_mem_cst_ss<0b0101, "st1h", Z_h, ZPR16, GPR64NoXZRshifted16>;
|
|
|
|
defm ST1H_S : sve_mem_cst_ss<0b0110, "st1h", Z_s, ZPR32, GPR64NoXZRshifted16>;
|
|
|
|
defm ST1H_D : sve_mem_cst_ss<0b0111, "st1h", Z_d, ZPR64, GPR64NoXZRshifted16>;
|
|
|
|
defm ST1W : sve_mem_cst_ss<0b1010, "st1w", Z_s, ZPR32, GPR64NoXZRshifted32>;
|
|
|
|
defm ST1W_D : sve_mem_cst_ss<0b1011, "st1w", Z_d, ZPR64, GPR64NoXZRshifted32>;
|
|
|
|
defm ST1D : sve_mem_cst_ss<0b1111, "st1d", Z_d, ZPR64, GPR64NoXZRshifted64>;
|
|
|
|
|
2018-05-02 21:00:30 +08:00
|
|
|
// Scatters using unscaled 32-bit offsets, e.g.
|
|
|
|
// st1h z0.s, p0, [x0, z0.s, uxtw]
|
|
|
|
// and unpacked:
|
|
|
|
// st1h z0.d, p0, [x0, z0.d, uxtw]
|
|
|
|
defm SST1B_D : sve_mem_sst_sv_32_unscaled<0b000, "st1b", Z_d, ZPR64, ZPR64ExtSXTW8Only, ZPR64ExtUXTW8Only>;
|
|
|
|
defm SST1B_S : sve_mem_sst_sv_32_unscaled<0b001, "st1b", Z_s, ZPR32, ZPR32ExtSXTW8Only, ZPR32ExtUXTW8Only>;
|
|
|
|
defm SST1H_D : sve_mem_sst_sv_32_unscaled<0b010, "st1h", Z_d, ZPR64, ZPR64ExtSXTW8, ZPR64ExtUXTW8>;
|
|
|
|
defm SST1H_S : sve_mem_sst_sv_32_unscaled<0b011, "st1h", Z_s, ZPR32, ZPR32ExtSXTW8, ZPR32ExtUXTW8>;
|
|
|
|
defm SST1W_D : sve_mem_sst_sv_32_unscaled<0b100, "st1w", Z_d, ZPR64, ZPR64ExtSXTW8, ZPR64ExtUXTW8>;
|
|
|
|
defm SST1W : sve_mem_sst_sv_32_unscaled<0b101, "st1w", Z_s, ZPR32, ZPR32ExtSXTW8, ZPR32ExtUXTW8>;
|
|
|
|
defm SST1D : sve_mem_sst_sv_32_unscaled<0b110, "st1d", Z_d, ZPR64, ZPR64ExtSXTW8, ZPR64ExtUXTW8>;
|
|
|
|
|
|
|
|
// Scatters using scaled 32-bit offsets, e.g.
|
|
|
|
// st1h z0.s, p0, [x0, z0.s, uxtw #1]
|
|
|
|
// and unpacked:
|
|
|
|
// st1h z0.d, p0, [x0, z0.d, uxtw #1]
|
|
|
|
defm SST1H_D : sve_mem_sst_sv_32_scaled<0b010, "st1h", Z_d, ZPR64, ZPR64ExtSXTW16, ZPR64ExtUXTW16>;
|
|
|
|
defm SST1H_S : sve_mem_sst_sv_32_scaled<0b011, "st1h", Z_s, ZPR32, ZPR32ExtSXTW16, ZPR32ExtUXTW16>;
|
|
|
|
defm SST1W_D : sve_mem_sst_sv_32_scaled<0b100, "st1w", Z_d, ZPR64, ZPR64ExtSXTW32, ZPR64ExtUXTW32>;
|
|
|
|
defm SST1W : sve_mem_sst_sv_32_scaled<0b101, "st1w", Z_s, ZPR32, ZPR32ExtSXTW32, ZPR32ExtUXTW32>;
|
|
|
|
defm SST1D : sve_mem_sst_sv_32_scaled<0b110, "st1d", Z_d, ZPR64, ZPR64ExtSXTW64, ZPR64ExtUXTW64>;
|
|
|
|
|
|
|
|
// Scatters using 32/64-bit pointers with offset, e.g.
|
|
|
|
// st1h z0.s, p0, [z0.s, #16]
|
|
|
|
// st1h z0.d, p0, [z0.d, #16]
|
|
|
|
defm SST1B_D : sve_mem_sst_vi_ptrs<0b000, "st1b", Z_d, ZPR64, imm0_31>;
|
|
|
|
defm SST1B_S : sve_mem_sst_vi_ptrs<0b001, "st1b", Z_s, ZPR32, imm0_31>;
|
|
|
|
defm SST1H_D : sve_mem_sst_vi_ptrs<0b010, "st1h", Z_d, ZPR64, uimm5s2>;
|
|
|
|
defm SST1H_S : sve_mem_sst_vi_ptrs<0b011, "st1h", Z_s, ZPR32, uimm5s2>;
|
|
|
|
defm SST1W_D : sve_mem_sst_vi_ptrs<0b100, "st1w", Z_d, ZPR64, uimm5s4>;
|
|
|
|
defm SST1W : sve_mem_sst_vi_ptrs<0b101, "st1w", Z_s, ZPR32, uimm5s4>;
|
|
|
|
defm SST1D : sve_mem_sst_vi_ptrs<0b110, "st1d", Z_d, ZPR64, uimm5s8>;
|
|
|
|
|
|
|
|
// Scatters using unscaled 64-bit offsets, e.g.
|
|
|
|
// st1h z0.d, p0, [x0, z0.d]
|
|
|
|
defm SST1B_D : sve_mem_sst_sv_64_unscaled<0b00, "st1b">;
|
|
|
|
defm SST1H_D : sve_mem_sst_sv_64_unscaled<0b01, "st1h">;
|
|
|
|
defm SST1W_D : sve_mem_sst_sv_64_unscaled<0b10, "st1w">;
|
|
|
|
defm SST1D : sve_mem_sst_sv_64_unscaled<0b11, "st1d">;
|
|
|
|
|
|
|
|
// Scatters using scaled 64-bit offsets, e.g.
|
|
|
|
// st1h z0.d, p0, [x0, z0.d, lsl #1]
|
|
|
|
defm SST1H_D_SCALED : sve_mem_sst_sv_64_scaled<0b01, "st1h", ZPR64ExtLSL16>;
|
|
|
|
defm SST1W_D_SCALED : sve_mem_sst_sv_64_scaled<0b10, "st1w", ZPR64ExtLSL32>;
|
|
|
|
defm SST1D_SCALED : sve_mem_sst_sv_64_scaled<0b11, "st1d", ZPR64ExtLSL64>;
|
|
|
|
|
2018-05-17 17:05:41 +08:00
|
|
|
// ST(2|3|4) structured stores (register + immediate)
|
2018-04-30 02:18:21 +08:00
|
|
|
defm ST2B_IMM : sve_mem_est_si<0b00, 0b01, ZZ_b, "st2b", simm4s2>;
|
|
|
|
defm ST3B_IMM : sve_mem_est_si<0b00, 0b10, ZZZ_b, "st3b", simm4s3>;
|
|
|
|
defm ST4B_IMM : sve_mem_est_si<0b00, 0b11, ZZZZ_b, "st4b", simm4s4>;
|
|
|
|
defm ST2H_IMM : sve_mem_est_si<0b01, 0b01, ZZ_h, "st2h", simm4s2>;
|
|
|
|
defm ST3H_IMM : sve_mem_est_si<0b01, 0b10, ZZZ_h, "st3h", simm4s3>;
|
|
|
|
defm ST4H_IMM : sve_mem_est_si<0b01, 0b11, ZZZZ_h, "st4h", simm4s4>;
|
|
|
|
defm ST2W_IMM : sve_mem_est_si<0b10, 0b01, ZZ_s, "st2w", simm4s2>;
|
|
|
|
defm ST3W_IMM : sve_mem_est_si<0b10, 0b10, ZZZ_s, "st3w", simm4s3>;
|
|
|
|
defm ST4W_IMM : sve_mem_est_si<0b10, 0b11, ZZZZ_s, "st4w", simm4s4>;
|
|
|
|
defm ST2D_IMM : sve_mem_est_si<0b11, 0b01, ZZ_d, "st2d", simm4s2>;
|
|
|
|
defm ST3D_IMM : sve_mem_est_si<0b11, 0b10, ZZZ_d, "st3d", simm4s3>;
|
|
|
|
defm ST4D_IMM : sve_mem_est_si<0b11, 0b11, ZZZZ_d, "st4d", simm4s4>;
|
[AArch64][SVE] Asm: Support for structured ST2, ST3 and ST4 (scalar+imm) store instructions.
Reviewers: fhahn, rengolin, javed.absar, SjoerdMeijer, t.p.northover, echristo, evandro, huntergr
Reviewed By: rengolin
Subscribers: tschuett, kristof.beyls, llvm-commits
Differential Revision: https://reviews.llvm.org/D45681
llvm-svn: 330565
2018-04-23 15:50:35 +08:00
|
|
|
|
2018-05-17 17:05:41 +08:00
|
|
|
// ST(2|3|4) structured stores (register + register)
|
|
|
|
def ST2B : sve_mem_est_ss<0b00, 0b01, ZZ_b, "st2b", GPR64NoXZRshifted8>;
|
|
|
|
def ST3B : sve_mem_est_ss<0b00, 0b10, ZZZ_b, "st3b", GPR64NoXZRshifted8>;
|
|
|
|
def ST4B : sve_mem_est_ss<0b00, 0b11, ZZZZ_b, "st4b", GPR64NoXZRshifted8>;
|
|
|
|
def ST2H : sve_mem_est_ss<0b01, 0b01, ZZ_h, "st2h", GPR64NoXZRshifted16>;
|
|
|
|
def ST3H : sve_mem_est_ss<0b01, 0b10, ZZZ_h, "st3h", GPR64NoXZRshifted16>;
|
|
|
|
def ST4H : sve_mem_est_ss<0b01, 0b11, ZZZZ_h, "st4h", GPR64NoXZRshifted16>;
|
|
|
|
def ST2W : sve_mem_est_ss<0b10, 0b01, ZZ_s, "st2w", GPR64NoXZRshifted32>;
|
|
|
|
def ST3W : sve_mem_est_ss<0b10, 0b10, ZZZ_s, "st3w", GPR64NoXZRshifted32>;
|
|
|
|
def ST4W : sve_mem_est_ss<0b10, 0b11, ZZZZ_s, "st4w", GPR64NoXZRshifted32>;
|
|
|
|
def ST2D : sve_mem_est_ss<0b11, 0b01, ZZ_d, "st2d", GPR64NoXZRshifted64>;
|
|
|
|
def ST3D : sve_mem_est_ss<0b11, 0b10, ZZZ_d, "st3d", GPR64NoXZRshifted64>;
|
|
|
|
def ST4D : sve_mem_est_ss<0b11, 0b11, ZZZZ_d, "st4d", GPR64NoXZRshifted64>;
|
|
|
|
|
2018-05-02 19:48:49 +08:00
|
|
|
// Non-temporal contiguous stores (register + immediate)
|
|
|
|
defm STNT1B_ZRI : sve_mem_cstnt_si<0b00, "stnt1b", Z_b, ZPR8>;
|
|
|
|
defm STNT1H_ZRI : sve_mem_cstnt_si<0b01, "stnt1h", Z_h, ZPR16>;
|
|
|
|
defm STNT1W_ZRI : sve_mem_cstnt_si<0b10, "stnt1w", Z_s, ZPR32>;
|
|
|
|
defm STNT1D_ZRI : sve_mem_cstnt_si<0b11, "stnt1d", Z_d, ZPR64>;
|
|
|
|
|
|
|
|
// Non-temporal contiguous stores (register + register)
|
|
|
|
defm STNT1B_ZRR : sve_mem_cstnt_ss<0b00, "stnt1b", Z_b, ZPR8, GPR64NoXZRshifted8>;
|
|
|
|
defm STNT1H_ZRR : sve_mem_cstnt_ss<0b01, "stnt1h", Z_h, ZPR16, GPR64NoXZRshifted16>;
|
|
|
|
defm STNT1W_ZRR : sve_mem_cstnt_ss<0b10, "stnt1w", Z_s, ZPR32, GPR64NoXZRshifted32>;
|
|
|
|
defm STNT1D_ZRR : sve_mem_cstnt_ss<0b11, "stnt1d", Z_d, ZPR64, GPR64NoXZRshifted64>;
|
|
|
|
|
2018-05-02 21:32:39 +08:00
|
|
|
// Fill/Spill
|
|
|
|
defm LDR_ZXI : sve_mem_z_fill<"ldr">;
|
|
|
|
defm LDR_PXI : sve_mem_p_fill<"ldr">;
|
|
|
|
defm STR_ZXI : sve_mem_z_spill<"str">;
|
|
|
|
defm STR_PXI : sve_mem_p_spill<"str">;
|
|
|
|
|
2018-05-16 15:50:09 +08:00
|
|
|
// Contiguous prefetch (register + immediate)
|
|
|
|
defm PRFB_PRI : sve_mem_prfm_si<0b00, "prfb">;
|
|
|
|
defm PRFH_PRI : sve_mem_prfm_si<0b01, "prfh">;
|
|
|
|
defm PRFW_PRI : sve_mem_prfm_si<0b10, "prfw">;
|
|
|
|
defm PRFD_PRI : sve_mem_prfm_si<0b11, "prfd">;
|
|
|
|
|
|
|
|
// Contiguous prefetch (register + register)
|
|
|
|
def PRFB_PRR : sve_mem_prfm_ss<0b001, "prfb", GPR64NoXZRshifted8>;
|
|
|
|
def PRFH_PRR : sve_mem_prfm_ss<0b011, "prfh", GPR64NoXZRshifted16>;
|
|
|
|
def PRFS_PRR : sve_mem_prfm_ss<0b101, "prfw", GPR64NoXZRshifted32>;
|
|
|
|
def PRFD_PRR : sve_mem_prfm_ss<0b111, "prfd", GPR64NoXZRshifted64>;
|
|
|
|
|
2018-05-16 22:16:01 +08:00
|
|
|
// Gather prefetch using scaled 32-bit offsets, e.g.
|
|
|
|
// prfh pldl1keep, p0, [x0, z0.s, uxtw #1]
|
|
|
|
defm PRFB_S : sve_mem_32b_prfm_sv_scaled<0b00, "prfb", ZPR32ExtSXTW8Only, ZPR32ExtUXTW8Only>;
|
|
|
|
defm PRFH_S : sve_mem_32b_prfm_sv_scaled<0b01, "prfh", ZPR32ExtSXTW16, ZPR32ExtUXTW16>;
|
|
|
|
defm PRFW_S : sve_mem_32b_prfm_sv_scaled<0b10, "prfw", ZPR32ExtSXTW32, ZPR32ExtUXTW32>;
|
|
|
|
defm PRFD_S : sve_mem_32b_prfm_sv_scaled<0b11, "prfd", ZPR32ExtSXTW64, ZPR32ExtUXTW64>;
|
|
|
|
|
|
|
|
// Gather prefetch using unpacked, scaled 32-bit offsets, e.g.
|
|
|
|
// prfh pldl1keep, p0, [x0, z0.d, uxtw #1]
|
|
|
|
defm PRFB_D : sve_mem_64b_prfm_sv_ext_scaled<0b00, "prfb", ZPR64ExtSXTW8Only, ZPR64ExtUXTW8Only>;
|
|
|
|
defm PRFH_D : sve_mem_64b_prfm_sv_ext_scaled<0b01, "prfh", ZPR64ExtSXTW16, ZPR64ExtUXTW16>;
|
|
|
|
defm PRFW_D : sve_mem_64b_prfm_sv_ext_scaled<0b10, "prfw", ZPR64ExtSXTW32, ZPR64ExtUXTW32>;
|
|
|
|
defm PRFD_D : sve_mem_64b_prfm_sv_ext_scaled<0b11, "prfd", ZPR64ExtSXTW64, ZPR64ExtUXTW64>;
|
|
|
|
|
|
|
|
// Gather prefetch using scaled 64-bit offsets, e.g.
|
|
|
|
// prfh pldl1keep, p0, [x0, z0.d, lsl #1]
|
|
|
|
defm PRFB_D_SCALED : sve_mem_64b_prfm_sv_lsl_scaled<0b00, "prfb", ZPR64ExtLSL8>;
|
|
|
|
defm PRFH_D_SCALED : sve_mem_64b_prfm_sv_lsl_scaled<0b01, "prfh", ZPR64ExtLSL16>;
|
|
|
|
defm PRFW_D_SCALED : sve_mem_64b_prfm_sv_lsl_scaled<0b10, "prfw", ZPR64ExtLSL32>;
|
|
|
|
defm PRFD_D_SCALED : sve_mem_64b_prfm_sv_lsl_scaled<0b11, "prfd", ZPR64ExtLSL64>;
|
|
|
|
|
|
|
|
// Gather prefetch using 32/64-bit pointers with offset, e.g.
|
|
|
|
// prfh pldl1keep, p0, [z0.s, #16]
|
|
|
|
// prfh pldl1keep, p0, [z0.d, #16]
|
|
|
|
defm PRFB_S_PZI : sve_mem_32b_prfm_vi<0b00, "prfb", imm0_31>;
|
|
|
|
defm PRFH_S_PZI : sve_mem_32b_prfm_vi<0b01, "prfh", uimm5s2>;
|
|
|
|
defm PRFW_S_PZI : sve_mem_32b_prfm_vi<0b10, "prfw", uimm5s4>;
|
|
|
|
defm PRFD_S_PZI : sve_mem_32b_prfm_vi<0b11, "prfd", uimm5s8>;
|
|
|
|
|
|
|
|
defm PRFB_D_PZI : sve_mem_64b_prfm_vi<0b00, "prfb", imm0_31>;
|
|
|
|
defm PRFH_D_PZI : sve_mem_64b_prfm_vi<0b01, "prfh", uimm5s2>;
|
|
|
|
defm PRFW_D_PZI : sve_mem_64b_prfm_vi<0b10, "prfw", uimm5s4>;
|
|
|
|
defm PRFD_D_PZI : sve_mem_64b_prfm_vi<0b11, "prfd", uimm5s8>;
|
|
|
|
|
[AArch64][SVE] Asm: Support for ADR instruction.
Supporting various addressing modes:
- adr z0.s, [z0.s, z0.s]
- adr z0.s, [z0.s, z0.s, lsl #<shift>]
- adr z0.d, [z0.d, z0.d]
- adr z0.d, [z0.d, z0.d, lsl #<shift>]
- adr z0.d, [z0.d, z0.d, uxtw #<shift>]
- adr z0.d, [z0.d, z0.d, sxtw #<shift>]
Reviewers: rengolin, fhahn, SjoerdMeijer, samparker, javed.absar
Reviewed By: SjoerdMeijer
Differential Revision: https://reviews.llvm.org/D48870
llvm-svn: 336533
2018-07-09 17:58:24 +08:00
|
|
|
defm ADR_SXTW_ZZZ_D : sve_int_bin_cons_misc_0_a_sxtw<0b00, "adr">;
|
|
|
|
defm ADR_UXTW_ZZZ_D : sve_int_bin_cons_misc_0_a_uxtw<0b01, "adr">;
|
|
|
|
defm ADR_LSL_ZZZ_S : sve_int_bin_cons_misc_0_a_32_lsl<0b10, "adr">;
|
|
|
|
defm ADR_LSL_ZZZ_D : sve_int_bin_cons_misc_0_a_64_lsl<0b11, "adr">;
|
|
|
|
|
2018-07-09 20:32:56 +08:00
|
|
|
defm TBL_ZZZ : sve_int_perm_tbl<"tbl">;
|
|
|
|
|
2017-12-20 19:02:42 +08:00
|
|
|
defm ZIP1_ZZZ : sve_int_perm_bin_perm_zz<0b000, "zip1">;
|
|
|
|
defm ZIP2_ZZZ : sve_int_perm_bin_perm_zz<0b001, "zip2">;
|
2018-07-09 17:12:17 +08:00
|
|
|
defm UZP1_ZZZ : sve_int_perm_bin_perm_zz<0b010, "uzp1">;
|
|
|
|
defm UZP2_ZZZ : sve_int_perm_bin_perm_zz<0b011, "uzp2">;
|
|
|
|
defm TRN1_ZZZ : sve_int_perm_bin_perm_zz<0b100, "trn1">;
|
|
|
|
defm TRN2_ZZZ : sve_int_perm_bin_perm_zz<0b101, "trn2">;
|
2017-12-20 19:02:42 +08:00
|
|
|
|
|
|
|
defm ZIP1_PPP : sve_int_perm_bin_perm_pp<0b000, "zip1">;
|
|
|
|
defm ZIP2_PPP : sve_int_perm_bin_perm_pp<0b001, "zip2">;
|
2018-07-09 17:12:17 +08:00
|
|
|
defm UZP1_PPP : sve_int_perm_bin_perm_pp<0b010, "uzp1">;
|
|
|
|
defm UZP2_PPP : sve_int_perm_bin_perm_pp<0b011, "uzp2">;
|
|
|
|
defm TRN1_PPP : sve_int_perm_bin_perm_pp<0b100, "trn1">;
|
|
|
|
defm TRN2_PPP : sve_int_perm_bin_perm_pp<0b101, "trn2">;
|
2018-01-10 19:32:47 +08:00
|
|
|
|
[AArch64][SVE] Asm: Support for vector element compares.
This patch adds instructions for comparing elements from two vectors, e.g.
cmpgt p0.s, p0/z, z0.s, z1.s
and also adds support for comparing to a 64-bit wide element vector, e.g.
cmpgt p0.s, p0/z, z0.s, z1.d
The patch also contains aliases for certain comparisons, e.g.:
cmple p0.s, p0/z, z0.s, z1.s => cmpge p0.s, p0/z, z1.s, z0.s
cmplo p0.s, p0/z, z0.s, z1.s => cmphi p0.s, p0/z, z1.s, z0.s
cmpls p0.s, p0/z, z0.s, z1.s => cmphs p0.s, p0/z, z1.s, z0.s
cmplt p0.s, p0/z, z0.s, z1.s => cmpgt p0.s, p0/z, z1.s, z0.s
llvm-svn: 334931
2018-06-18 18:59:19 +08:00
|
|
|
defm CMPHS_PPzZZ : sve_int_cmp_0<0b000, "cmphs">;
|
|
|
|
defm CMPHI_PPzZZ : sve_int_cmp_0<0b001, "cmphi">;
|
|
|
|
defm CMPGE_PPzZZ : sve_int_cmp_0<0b100, "cmpge">;
|
|
|
|
defm CMPGT_PPzZZ : sve_int_cmp_0<0b101, "cmpgt">;
|
|
|
|
defm CMPEQ_PPzZZ : sve_int_cmp_0<0b110, "cmpeq">;
|
|
|
|
defm CMPNE_PPzZZ : sve_int_cmp_0<0b111, "cmpne">;
|
|
|
|
|
|
|
|
defm CMPEQ_WIDE_PPzZZ : sve_int_cmp_0_wide<0b010, "cmpeq">;
|
|
|
|
defm CMPNE_WIDE_PPzZZ : sve_int_cmp_0_wide<0b011, "cmpne">;
|
|
|
|
defm CMPGE_WIDE_PPzZZ : sve_int_cmp_1_wide<0b000, "cmpge">;
|
|
|
|
defm CMPGT_WIDE_PPzZZ : sve_int_cmp_1_wide<0b001, "cmpgt">;
|
|
|
|
defm CMPLT_WIDE_PPzZZ : sve_int_cmp_1_wide<0b010, "cmplt">;
|
|
|
|
defm CMPLE_WIDE_PPzZZ : sve_int_cmp_1_wide<0b011, "cmple">;
|
|
|
|
defm CMPHS_WIDE_PPzZZ : sve_int_cmp_1_wide<0b100, "cmphs">;
|
|
|
|
defm CMPHI_WIDE_PPzZZ : sve_int_cmp_1_wide<0b101, "cmphi">;
|
|
|
|
defm CMPLO_WIDE_PPzZZ : sve_int_cmp_1_wide<0b110, "cmplo">;
|
|
|
|
defm CMPLS_WIDE_PPzZZ : sve_int_cmp_1_wide<0b111, "cmpls">;
|
|
|
|
|
2018-07-02 16:20:59 +08:00
|
|
|
defm CMPGE_PPzZI : sve_int_scmp_vi<0b000, "cmpge">;
|
|
|
|
defm CMPGT_PPzZI : sve_int_scmp_vi<0b001, "cmpgt">;
|
|
|
|
defm CMPLT_PPzZI : sve_int_scmp_vi<0b010, "cmplt">;
|
|
|
|
defm CMPLE_PPzZI : sve_int_scmp_vi<0b011, "cmple">;
|
|
|
|
defm CMPEQ_PPzZI : sve_int_scmp_vi<0b100, "cmpeq">;
|
|
|
|
defm CMPNE_PPzZI : sve_int_scmp_vi<0b101, "cmpne">;
|
|
|
|
defm CMPHS_PPzZI : sve_int_ucmp_vi<0b00, "cmphs">;
|
|
|
|
defm CMPHI_PPzZI : sve_int_ucmp_vi<0b01, "cmphi">;
|
|
|
|
defm CMPLO_PPzZI : sve_int_ucmp_vi<0b10, "cmplo">;
|
|
|
|
defm CMPLS_PPzZI : sve_int_ucmp_vi<0b11, "cmpls">;
|
|
|
|
|
[AArch64][SVE] Asm: Support for vector element FP compare.
Contains the following variants:
- Compare with (elements from) other vector
instructions: fcmeq, fcmgt, fcmge, fcmne, fcmuo.
aliases: fcmle, fcmlt.
e.g. fcmle p0.h, p0/z, z0.h, z1.h => fcmge p0.h, p0/z, z1.h, z0.h
- Compare absolute values with (absolute values from) other vector.
instructions: facge, facgt.
aliases: facle, faclt.
e.g. facle p0.h, p0/z, z0.h, z1.h => facge p0.h, p0/z, z1.h, z0.h
- Compare vector elements with #0.0
instructions: fcmeq, fcmgt, fcmge, fcmle, fcmlt, fcmne.
e.g. fcmle p0.h, p0/z, z0.h, #0.0
llvm-svn: 336182
2018-07-03 17:07:23 +08:00
|
|
|
defm FCMGE_PPzZZ : sve_fp_3op_p_pd<0b000, "fcmge">;
|
|
|
|
defm FCMGT_PPzZZ : sve_fp_3op_p_pd<0b001, "fcmgt">;
|
|
|
|
defm FCMEQ_PPzZZ : sve_fp_3op_p_pd<0b010, "fcmeq">;
|
|
|
|
defm FCMNE_PPzZZ : sve_fp_3op_p_pd<0b011, "fcmne">;
|
|
|
|
defm FCMUO_PPzZZ : sve_fp_3op_p_pd<0b100, "fcmuo">;
|
|
|
|
defm FACGE_PPzZZ : sve_fp_3op_p_pd<0b101, "facge">;
|
|
|
|
defm FACGT_PPzZZ : sve_fp_3op_p_pd<0b111, "facgt">;
|
|
|
|
|
|
|
|
defm FCMGE_PPzZ0 : sve_fp_2op_p_pd<0b000, "fcmge">;
|
|
|
|
defm FCMGT_PPzZ0 : sve_fp_2op_p_pd<0b001, "fcmgt">;
|
|
|
|
defm FCMLT_PPzZ0 : sve_fp_2op_p_pd<0b010, "fcmlt">;
|
|
|
|
defm FCMLE_PPzZ0 : sve_fp_2op_p_pd<0b011, "fcmle">;
|
|
|
|
defm FCMEQ_PPzZ0 : sve_fp_2op_p_pd<0b100, "fcmeq">;
|
|
|
|
defm FCMNE_PPzZ0 : sve_fp_2op_p_pd<0b110, "fcmne">;
|
|
|
|
|
2018-07-29 16:51:08 +08:00
|
|
|
defm WHILELT_PWW : sve_int_while4_rr<0b010, "whilelt">;
|
|
|
|
defm WHILELE_PWW : sve_int_while4_rr<0b011, "whilele">;
|
|
|
|
defm WHILELO_PWW : sve_int_while4_rr<0b110, "whilelo">;
|
|
|
|
defm WHILELS_PWW : sve_int_while4_rr<0b111, "whilels">;
|
|
|
|
|
|
|
|
defm WHILELT_PXX : sve_int_while8_rr<0b010, "whilelt">;
|
|
|
|
defm WHILELE_PXX : sve_int_while8_rr<0b011, "whilele">;
|
|
|
|
defm WHILELO_PXX : sve_int_while8_rr<0b110, "whilelo">;
|
|
|
|
defm WHILELS_PXX : sve_int_while8_rr<0b111, "whilels">;
|
|
|
|
|
2018-07-29 16:00:16 +08:00
|
|
|
def CTERMEQ_WW : sve_int_cterm<0b0, 0b0, "ctermeq", GPR32>;
|
|
|
|
def CTERMNE_WW : sve_int_cterm<0b0, 0b1, "ctermne", GPR32>;
|
|
|
|
def CTERMEQ_XX : sve_int_cterm<0b1, 0b0, "ctermeq", GPR64>;
|
|
|
|
def CTERMNE_XX : sve_int_cterm<0b1, 0b1, "ctermne", GPR64>;
|
|
|
|
|
2018-07-09 23:22:08 +08:00
|
|
|
def RDVLI_XI : sve_int_read_vl_a<0b0, 0b11111, "rdvl">;
|
|
|
|
def ADDVL_XXI : sve_int_arith_vl<0b0, "addvl">;
|
|
|
|
def ADDPL_XXI : sve_int_arith_vl<0b1, "addpl">;
|
|
|
|
|
|
|
|
defm CNTB_XPiI : sve_int_count<0b000, "cntb">;
|
|
|
|
defm CNTH_XPiI : sve_int_count<0b010, "cnth">;
|
|
|
|
defm CNTW_XPiI : sve_int_count<0b100, "cntw">;
|
|
|
|
defm CNTD_XPiI : sve_int_count<0b110, "cntd">;
|
|
|
|
defm CNTP_XPP : sve_int_pcount_pred<0b0000, "cntp">;
|
|
|
|
|
|
|
|
defm INCB_XPiI : sve_int_pred_pattern_a<0b000, "incb">;
|
|
|
|
defm DECB_XPiI : sve_int_pred_pattern_a<0b001, "decb">;
|
|
|
|
defm INCH_XPiI : sve_int_pred_pattern_a<0b010, "inch">;
|
|
|
|
defm DECH_XPiI : sve_int_pred_pattern_a<0b011, "dech">;
|
|
|
|
defm INCW_XPiI : sve_int_pred_pattern_a<0b100, "incw">;
|
|
|
|
defm DECW_XPiI : sve_int_pred_pattern_a<0b101, "decw">;
|
|
|
|
defm INCD_XPiI : sve_int_pred_pattern_a<0b110, "incd">;
|
|
|
|
defm DECD_XPiI : sve_int_pred_pattern_a<0b111, "decd">;
|
|
|
|
|
2018-07-02 15:34:52 +08:00
|
|
|
defm SQINCB_XPiWdI : sve_int_pred_pattern_b_s32<0b00000, "sqincb">;
|
|
|
|
defm UQINCB_WPiI : sve_int_pred_pattern_b_u32<0b00001, "uqincb">;
|
|
|
|
defm SQDECB_XPiWdI : sve_int_pred_pattern_b_s32<0b00010, "sqdecb">;
|
|
|
|
defm UQDECB_WPiI : sve_int_pred_pattern_b_u32<0b00011, "uqdecb">;
|
2018-06-18 22:47:52 +08:00
|
|
|
defm SQINCB_XPiI : sve_int_pred_pattern_b_x64<0b00100, "sqincb">;
|
|
|
|
defm UQINCB_XPiI : sve_int_pred_pattern_b_x64<0b00101, "uqincb">;
|
|
|
|
defm SQDECB_XPiI : sve_int_pred_pattern_b_x64<0b00110, "sqdecb">;
|
|
|
|
defm UQDECB_XPiI : sve_int_pred_pattern_b_x64<0b00111, "uqdecb">;
|
|
|
|
|
2018-07-02 15:34:52 +08:00
|
|
|
defm SQINCH_XPiWdI : sve_int_pred_pattern_b_s32<0b01000, "sqinch">;
|
|
|
|
defm UQINCH_WPiI : sve_int_pred_pattern_b_u32<0b01001, "uqinch">;
|
|
|
|
defm SQDECH_XPiWdI : sve_int_pred_pattern_b_s32<0b01010, "sqdech">;
|
|
|
|
defm UQDECH_WPiI : sve_int_pred_pattern_b_u32<0b01011, "uqdech">;
|
2018-06-18 22:47:52 +08:00
|
|
|
defm SQINCH_XPiI : sve_int_pred_pattern_b_x64<0b01100, "sqinch">;
|
|
|
|
defm UQINCH_XPiI : sve_int_pred_pattern_b_x64<0b01101, "uqinch">;
|
|
|
|
defm SQDECH_XPiI : sve_int_pred_pattern_b_x64<0b01110, "sqdech">;
|
|
|
|
defm UQDECH_XPiI : sve_int_pred_pattern_b_x64<0b01111, "uqdech">;
|
|
|
|
|
2018-07-02 15:34:52 +08:00
|
|
|
defm SQINCW_XPiWdI : sve_int_pred_pattern_b_s32<0b10000, "sqincw">;
|
|
|
|
defm UQINCW_WPiI : sve_int_pred_pattern_b_u32<0b10001, "uqincw">;
|
|
|
|
defm SQDECW_XPiWdI : sve_int_pred_pattern_b_s32<0b10010, "sqdecw">;
|
|
|
|
defm UQDECW_WPiI : sve_int_pred_pattern_b_u32<0b10011, "uqdecw">;
|
2018-06-18 22:47:52 +08:00
|
|
|
defm SQINCW_XPiI : sve_int_pred_pattern_b_x64<0b10100, "sqincw">;
|
|
|
|
defm UQINCW_XPiI : sve_int_pred_pattern_b_x64<0b10101, "uqincw">;
|
|
|
|
defm SQDECW_XPiI : sve_int_pred_pattern_b_x64<0b10110, "sqdecw">;
|
|
|
|
defm UQDECW_XPiI : sve_int_pred_pattern_b_x64<0b10111, "uqdecw">;
|
|
|
|
|
2018-07-02 15:34:52 +08:00
|
|
|
defm SQINCD_XPiWdI : sve_int_pred_pattern_b_s32<0b11000, "sqincd">;
|
|
|
|
defm UQINCD_WPiI : sve_int_pred_pattern_b_u32<0b11001, "uqincd">;
|
|
|
|
defm SQDECD_XPiWdI : sve_int_pred_pattern_b_s32<0b11010, "sqdecd">;
|
|
|
|
defm UQDECD_WPiI : sve_int_pred_pattern_b_u32<0b11011, "uqdecd">;
|
2018-06-18 22:47:52 +08:00
|
|
|
defm SQINCD_XPiI : sve_int_pred_pattern_b_x64<0b11100, "sqincd">;
|
|
|
|
defm UQINCD_XPiI : sve_int_pred_pattern_b_x64<0b11101, "uqincd">;
|
|
|
|
defm SQDECD_XPiI : sve_int_pred_pattern_b_x64<0b11110, "sqdecd">;
|
|
|
|
defm UQDECD_XPiI : sve_int_pred_pattern_b_x64<0b11111, "uqdecd">;
|
|
|
|
|
[AArch64][SVE] Asm: Support for (saturating) vector INC/DEC instructions.
Increment/decrement vector by multiple of predicate constraint
element count.
The variants added by this patch are:
- INCH, INCW, INC
and (saturating):
- SQINCH, SQINCW, SQINCD
- UQINCH, UQINCW, UQINCW
- SQDECH, SQINCW, SQINCD
- UQDECH, UQINCW, UQINCW
For example:
incw z0.s, all, mul #4
llvm-svn: 336090
2018-07-02 17:31:11 +08:00
|
|
|
defm SQINCH_ZPiI : sve_int_countvlv<0b01000, "sqinch", ZPR16>;
|
|
|
|
defm UQINCH_ZPiI : sve_int_countvlv<0b01001, "uqinch", ZPR16>;
|
|
|
|
defm SQDECH_ZPiI : sve_int_countvlv<0b01010, "sqdech", ZPR16>;
|
|
|
|
defm UQDECH_ZPiI : sve_int_countvlv<0b01011, "uqdech", ZPR16>;
|
|
|
|
defm INCH_ZPiI : sve_int_countvlv<0b01100, "inch", ZPR16>;
|
|
|
|
defm DECH_ZPiI : sve_int_countvlv<0b01101, "dech", ZPR16>;
|
|
|
|
defm SQINCW_ZPiI : sve_int_countvlv<0b10000, "sqincw", ZPR32>;
|
|
|
|
defm UQINCW_ZPiI : sve_int_countvlv<0b10001, "uqincw", ZPR32>;
|
|
|
|
defm SQDECW_ZPiI : sve_int_countvlv<0b10010, "sqdecw", ZPR32>;
|
|
|
|
defm UQDECW_ZPiI : sve_int_countvlv<0b10011, "uqdecw", ZPR32>;
|
|
|
|
defm INCW_ZPiI : sve_int_countvlv<0b10100, "incw", ZPR32>;
|
|
|
|
defm DECW_ZPiI : sve_int_countvlv<0b10101, "decw", ZPR32>;
|
|
|
|
defm SQINCD_ZPiI : sve_int_countvlv<0b11000, "sqincd", ZPR64>;
|
|
|
|
defm UQINCD_ZPiI : sve_int_countvlv<0b11001, "uqincd", ZPR64>;
|
|
|
|
defm SQDECD_ZPiI : sve_int_countvlv<0b11010, "sqdecd", ZPR64>;
|
|
|
|
defm UQDECD_ZPiI : sve_int_countvlv<0b11011, "uqdecd", ZPR64>;
|
|
|
|
defm INCD_ZPiI : sve_int_countvlv<0b11100, "incd", ZPR64>;
|
|
|
|
defm DECD_ZPiI : sve_int_countvlv<0b11101, "decd", ZPR64>;
|
|
|
|
|
[AArch64][SVE] Asm: Support for (SQ)INCP/DECP (scalar, vector)
Increments/decrements the result with the number of active bits
from the predicate.
The inc/dec variants added are:
- incp x0, p0.h (scalar)
- incp z0.h, p0 (vector)
The unsigned saturating inc/dec variants added are:
- uqincp x0, p0.h (scalar)
- uqincp w0, p0.h (scalar, 32bit)
- uqincp z0.h, p0 (vector)
The signed saturating inc/dec variants added are:
- sqincp x0, p0.h (scalar)
- sqincp x0, p0.h, w0 (scalar, 32bit)
- sqincp z0.h, p0 (vector)
llvm-svn: 336091
2018-07-02 18:08:36 +08:00
|
|
|
defm SQINCP_XPWd : sve_int_count_r_s32<0b00000, "sqincp">;
|
|
|
|
defm SQINCP_XP : sve_int_count_r_x64<0b00010, "sqincp">;
|
|
|
|
defm UQINCP_WP : sve_int_count_r_u32<0b00100, "uqincp">;
|
|
|
|
defm UQINCP_XP : sve_int_count_r_x64<0b00110, "uqincp">;
|
|
|
|
defm SQDECP_XPWd : sve_int_count_r_s32<0b01000, "sqdecp">;
|
|
|
|
defm SQDECP_XP : sve_int_count_r_x64<0b01010, "sqdecp">;
|
|
|
|
defm UQDECP_WP : sve_int_count_r_u32<0b01100, "uqdecp">;
|
|
|
|
defm UQDECP_XP : sve_int_count_r_x64<0b01110, "uqdecp">;
|
|
|
|
defm INCP_XP : sve_int_count_r_x64<0b10000, "incp">;
|
|
|
|
defm DECP_XP : sve_int_count_r_x64<0b10100, "decp">;
|
|
|
|
|
|
|
|
defm SQINCP_ZP : sve_int_count_v<0b00000, "sqincp">;
|
|
|
|
defm UQINCP_ZP : sve_int_count_v<0b00100, "uqincp">;
|
|
|
|
defm SQDECP_ZP : sve_int_count_v<0b01000, "sqdecp">;
|
|
|
|
defm UQDECP_ZP : sve_int_count_v<0b01100, "uqdecp">;
|
|
|
|
defm INCP_ZP : sve_int_count_v<0b10000, "incp">;
|
|
|
|
defm DECP_ZP : sve_int_count_v<0b10100, "decp">;
|
|
|
|
|
[AArch64][SVE] Asm: Add support for SVE INDEX instructions.
Reviewers: rengolin, fhahn, javed.absar, SjoerdMeijer, huntergr, t.p.northover, echristo, evandro
Reviewed By: rengolin, fhahn
Subscribers: tschuett, llvm-commits, kristof.beyls
Differential Revision: https://reviews.llvm.org/D45370
llvm-svn: 329674
2018-04-10 15:01:53 +08:00
|
|
|
defm INDEX_RR : sve_int_index_rr<"index">;
|
|
|
|
defm INDEX_IR : sve_int_index_ir<"index">;
|
|
|
|
defm INDEX_RI : sve_int_index_ri<"index">;
|
|
|
|
defm INDEX_II : sve_int_index_ii<"index">;
|
[AArch64][SVE] Asm: Add support for unpredicated LSL/LSR (shift by immediate) instructions.
Reviewers: rengolin, fhahn, javed.absar, SjoerdMeijer, huntergr, t.p.northover, echristo, evandro
Reviewed By: rengolin, fhahn
Subscribers: tschuett, kristof.beyls, llvm-commits
Differential Revision: https://reviews.llvm.org/D45371
llvm-svn: 329681
2018-04-10 18:03:13 +08:00
|
|
|
|
[AArch64][SVE] Asm: Support for remaining shift instructions.
This patch completes support for shifts, which include:
- LSL - Logical Shift Left
- LSLR - Logical Shift Left, Reversed form
- LSR - Logical Shift Right
- LSRR - Logical Shift Right, Reversed form
- ASR - Arithmetic Shift Right
- ASRR - Arithmetic Shift Right, Reversed form
- ASRD - Arithmetic Shift Right for Divide
In the following variants:
- Predicated shift by immediate - ASR, LSL, LSR, ASRD
e.g.
asr z0.h, p0/m, z0.h, #1
(active lanes of z0 shifted by #1)
- Unpredicated shift by immediate - ASR, LSL*, LSR*
e.g.
asr z0.h, z1.h, #1
(all lanes of z1 shifted by #1, stored in z0)
- Predicated shift by vector - ASR, LSL*, LSR*
e.g.
asr z0.h, p0/m, z0.h, z1.h
(active lanes of z0 shifted by z1, stored in z0)
- Predicated shift by vector, reversed form - ASRR, LSLR, LSRR
e.g.
lslr z0.h, p0/m, z0.h, z1.h
(active lanes of z1 shifted by z0, stored in z0)
- Predicated shift left/right by wide vector - ASR, LSL, LSR
e.g.
lsl z0.h, p0/m, z0.h, z1.d
(active lanes of z0 shifted by wide elements of vector z1)
- Unpredicated shift left/right by wide vector - ASR, LSL, LSR
e.g.
lsl z0.h, z1.h, z2.d
(all lanes of z1 shifted by wide elements of z2, stored in z0)
*Variants added in previous patches.
llvm-svn: 336547
2018-07-09 21:23:41 +08:00
|
|
|
// Unpredicated shifts
|
|
|
|
defm ASR_ZZI : sve_int_bin_cons_shift_imm_right<0b00, "asr">;
|
|
|
|
defm LSR_ZZI : sve_int_bin_cons_shift_imm_right<0b01, "lsr">;
|
|
|
|
defm LSL_ZZI : sve_int_bin_cons_shift_imm_left< 0b11, "lsl">;
|
|
|
|
|
|
|
|
defm ASR_WIDE_ZZZ : sve_int_bin_cons_shift_wide<0b00, "asr">;
|
|
|
|
defm LSR_WIDE_ZZZ : sve_int_bin_cons_shift_wide<0b01, "lsr">;
|
|
|
|
defm LSL_WIDE_ZZZ : sve_int_bin_cons_shift_wide<0b11, "lsl">;
|
|
|
|
|
|
|
|
// Predicated shifts
|
[AArch64][SVE2] Asm: add various bitwise shift instructions
Summary:
This patch adds support for the SVE2 saturating/rounding bitwise shift
left (predicated) group of instructions:
* SRSHL, URSHL, SRSHLR, URSHLR, SQSHL, UQSHL, SQRSHL, UQRSHL,
SQSHLR, UQSHLR, SQRSHLR, UQRSHLR
Immediate forms of the SQSHL and UQSHL instructions are also added to
the existing SVE bitwise shift by immediate (predicated) group, as well
as three new instructions SRSHR/URSHR/SQSHLU. The new instructions in
this group are encoded similarly and are implemented using the same
TableGen class with a minimal change (1 bit in encoding).
The specification can be found here:
https://developer.arm.com/docs/ddi0602/latest
Reviewed By: SjoerdMeijer
Differential Revision: https://reviews.llvm.org/D62140
llvm-svn: 361612
2019-05-24 17:17:23 +08:00
|
|
|
defm ASR_ZPmI : sve_int_bin_pred_shift_imm_right<0b0000, "asr">;
|
|
|
|
defm LSR_ZPmI : sve_int_bin_pred_shift_imm_right<0b0001, "lsr">;
|
|
|
|
defm LSL_ZPmI : sve_int_bin_pred_shift_imm_left< 0b0011, "lsl">;
|
|
|
|
defm ASRD_ZPmI : sve_int_bin_pred_shift_imm_right<0b0100, "asrd">;
|
[AArch64][SVE] Asm: Support for remaining shift instructions.
This patch completes support for shifts, which include:
- LSL - Logical Shift Left
- LSLR - Logical Shift Left, Reversed form
- LSR - Logical Shift Right
- LSRR - Logical Shift Right, Reversed form
- ASR - Arithmetic Shift Right
- ASRR - Arithmetic Shift Right, Reversed form
- ASRD - Arithmetic Shift Right for Divide
In the following variants:
- Predicated shift by immediate - ASR, LSL, LSR, ASRD
e.g.
asr z0.h, p0/m, z0.h, #1
(active lanes of z0 shifted by #1)
- Unpredicated shift by immediate - ASR, LSL*, LSR*
e.g.
asr z0.h, z1.h, #1
(all lanes of z1 shifted by #1, stored in z0)
- Predicated shift by vector - ASR, LSL*, LSR*
e.g.
asr z0.h, p0/m, z0.h, z1.h
(active lanes of z0 shifted by z1, stored in z0)
- Predicated shift by vector, reversed form - ASRR, LSLR, LSRR
e.g.
lslr z0.h, p0/m, z0.h, z1.h
(active lanes of z1 shifted by z0, stored in z0)
- Predicated shift left/right by wide vector - ASR, LSL, LSR
e.g.
lsl z0.h, p0/m, z0.h, z1.d
(active lanes of z0 shifted by wide elements of vector z1)
- Unpredicated shift left/right by wide vector - ASR, LSL, LSR
e.g.
lsl z0.h, z1.h, z2.d
(all lanes of z1 shifted by wide elements of z2, stored in z0)
*Variants added in previous patches.
llvm-svn: 336547
2018-07-09 21:23:41 +08:00
|
|
|
|
|
|
|
defm ASR_ZPmZ : sve_int_bin_pred_shift<0b000, "asr">;
|
|
|
|
defm LSR_ZPmZ : sve_int_bin_pred_shift<0b001, "lsr">;
|
|
|
|
defm LSL_ZPmZ : sve_int_bin_pred_shift<0b011, "lsl">;
|
|
|
|
defm ASRR_ZPmZ : sve_int_bin_pred_shift<0b100, "asrr">;
|
|
|
|
defm LSRR_ZPmZ : sve_int_bin_pred_shift<0b101, "lsrr">;
|
|
|
|
defm LSLR_ZPmZ : sve_int_bin_pred_shift<0b111, "lslr">;
|
|
|
|
|
|
|
|
defm ASR_WIDE_ZPmZ : sve_int_bin_pred_shift_wide<0b000, "asr">;
|
|
|
|
defm LSR_WIDE_ZPmZ : sve_int_bin_pred_shift_wide<0b001, "lsr">;
|
|
|
|
defm LSL_WIDE_ZPmZ : sve_int_bin_pred_shift_wide<0b011, "lsl">;
|
[AArch64][SVE] Asm: Support for AND, ORR, EOR and BIC instructions.
This patch addresses the following variants:
- bitmask immediate, e.g. 'and z0.d, z0.d, #0x6'.
- unpredicated data vectors, e.g. 'and z0.d, z1.d, z2.d'.
- predicated data vectors, e.g. 'and z0.d, p0/m, z0.d, z1.d'.
And also several aliases, such as:
- ORN, alias of ORR.
- EON, alias of EOR.
- BIC, alias of AND (immediate variant)
- MOV, alias of ORR (if unpredicated and source register operands are the same)
Reviewers: rengolin, huntergr, fhahn, samparker, SjoerdMeijer, javed.absar
Reviewed By: fhahn
Differential Revision: https://reviews.llvm.org/D47363
llvm-svn: 333414
2018-05-29 21:08:43 +08:00
|
|
|
|
2018-07-31 00:05:45 +08:00
|
|
|
def FCVT_ZPmZ_StoH : sve_fp_2op_p_zd<0b1001000, "fcvt", ZPR32, ZPR16, ElementSizeS>;
|
|
|
|
def FCVT_ZPmZ_HtoS : sve_fp_2op_p_zd<0b1001001, "fcvt", ZPR16, ZPR32, ElementSizeS>;
|
|
|
|
def SCVTF_ZPmZ_HtoH : sve_fp_2op_p_zd<0b0110010, "scvtf", ZPR16, ZPR16, ElementSizeH>;
|
|
|
|
def SCVTF_ZPmZ_StoS : sve_fp_2op_p_zd<0b1010100, "scvtf", ZPR32, ZPR32, ElementSizeS>;
|
|
|
|
def UCVTF_ZPmZ_StoS : sve_fp_2op_p_zd<0b1010101, "ucvtf", ZPR32, ZPR32, ElementSizeS>;
|
|
|
|
def UCVTF_ZPmZ_HtoH : sve_fp_2op_p_zd<0b0110011, "ucvtf", ZPR16, ZPR16, ElementSizeH>;
|
|
|
|
def FCVTZS_ZPmZ_HtoH : sve_fp_2op_p_zd<0b0111010, "fcvtzs", ZPR16, ZPR16, ElementSizeH>;
|
|
|
|
def FCVTZS_ZPmZ_StoS : sve_fp_2op_p_zd<0b1011100, "fcvtzs", ZPR32, ZPR32, ElementSizeS>;
|
|
|
|
def FCVTZU_ZPmZ_HtoH : sve_fp_2op_p_zd<0b0111011, "fcvtzu", ZPR16, ZPR16, ElementSizeH>;
|
|
|
|
def FCVTZU_ZPmZ_StoS : sve_fp_2op_p_zd<0b1011101, "fcvtzu", ZPR32, ZPR32, ElementSizeS>;
|
|
|
|
def FCVT_ZPmZ_DtoH : sve_fp_2op_p_zd<0b1101000, "fcvt", ZPR64, ZPR16, ElementSizeD>;
|
|
|
|
def FCVT_ZPmZ_HtoD : sve_fp_2op_p_zd<0b1101001, "fcvt", ZPR16, ZPR64, ElementSizeD>;
|
|
|
|
def FCVT_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1101010, "fcvt", ZPR64, ZPR32, ElementSizeD>;
|
|
|
|
def FCVT_ZPmZ_StoD : sve_fp_2op_p_zd<0b1101011, "fcvt", ZPR32, ZPR64, ElementSizeD>;
|
|
|
|
def SCVTF_ZPmZ_StoD : sve_fp_2op_p_zd<0b1110000, "scvtf", ZPR32, ZPR64, ElementSizeD>;
|
|
|
|
def UCVTF_ZPmZ_StoD : sve_fp_2op_p_zd<0b1110001, "ucvtf", ZPR32, ZPR64, ElementSizeD>;
|
|
|
|
def UCVTF_ZPmZ_StoH : sve_fp_2op_p_zd<0b0110101, "ucvtf", ZPR32, ZPR16, ElementSizeS>;
|
|
|
|
def SCVTF_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1110100, "scvtf", ZPR64, ZPR32, ElementSizeD>;
|
|
|
|
def SCVTF_ZPmZ_StoH : sve_fp_2op_p_zd<0b0110100, "scvtf", ZPR32, ZPR16, ElementSizeS>;
|
|
|
|
def SCVTF_ZPmZ_DtoH : sve_fp_2op_p_zd<0b0110110, "scvtf", ZPR64, ZPR16, ElementSizeD>;
|
|
|
|
def UCVTF_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1110101, "ucvtf", ZPR64, ZPR32, ElementSizeD>;
|
|
|
|
def UCVTF_ZPmZ_DtoH : sve_fp_2op_p_zd<0b0110111, "ucvtf", ZPR64, ZPR16, ElementSizeD>;
|
|
|
|
def SCVTF_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1110110, "scvtf", ZPR64, ZPR64, ElementSizeD>;
|
|
|
|
def UCVTF_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1110111, "ucvtf", ZPR64, ZPR64, ElementSizeD>;
|
|
|
|
def FCVTZS_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1111000, "fcvtzs", ZPR64, ZPR32, ElementSizeD>;
|
|
|
|
def FCVTZU_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1111001, "fcvtzu", ZPR64, ZPR32, ElementSizeD>;
|
|
|
|
def FCVTZS_ZPmZ_StoD : sve_fp_2op_p_zd<0b1111100, "fcvtzs", ZPR32, ZPR64, ElementSizeD>;
|
|
|
|
def FCVTZS_ZPmZ_HtoS : sve_fp_2op_p_zd<0b0111100, "fcvtzs", ZPR16, ZPR32, ElementSizeS>;
|
|
|
|
def FCVTZS_ZPmZ_HtoD : sve_fp_2op_p_zd<0b0111110, "fcvtzs", ZPR16, ZPR64, ElementSizeD>;
|
|
|
|
def FCVTZU_ZPmZ_HtoS : sve_fp_2op_p_zd<0b0111101, "fcvtzu", ZPR16, ZPR32, ElementSizeS>;
|
|
|
|
def FCVTZU_ZPmZ_HtoD : sve_fp_2op_p_zd<0b0111111, "fcvtzu", ZPR16, ZPR64, ElementSizeD>;
|
|
|
|
def FCVTZU_ZPmZ_StoD : sve_fp_2op_p_zd<0b1111101, "fcvtzu", ZPR32, ZPR64, ElementSizeD>;
|
|
|
|
def FCVTZS_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1111110, "fcvtzs", ZPR64, ZPR64, ElementSizeD>;
|
|
|
|
def FCVTZU_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1111111, "fcvtzu", ZPR64, ZPR64, ElementSizeD>;
|
[AArch64][SVE] Asm: Support for FP conversion instructions.
The variants added are:
- fcvt (FP convert precision)
- scvtf (signed int -> FP)
- ucvtf (unsigned int -> FP)
- fcvtzs (FP -> signed int (round to zero))
- fcvtzu (FP -> unsigned int (round to zero))
For example:
fcvt z0.h, p0/m, z0.s (single- to half-precision FP)
scvtf z0.h, p0/m, z0.s (32-bit int to half-precision FP)
ucvtf z0.h, p0/m, z0.s (32-bit unsigned int to half-precision FP)
fcvtzs z0.s, p0/m, z0.h (half-precision FP to 32-bit int)
fcvtzu z0.s, p0/m, z0.h (half-precision FP to 32-bit unsigned int)
llvm-svn: 336265
2018-07-04 20:13:17 +08:00
|
|
|
|
2018-07-06 04:21:21 +08:00
|
|
|
defm FRINTN_ZPmZ : sve_fp_2op_p_zd_HSD<0b00000, "frintn">;
|
|
|
|
defm FRINTP_ZPmZ : sve_fp_2op_p_zd_HSD<0b00001, "frintp">;
|
|
|
|
defm FRINTM_ZPmZ : sve_fp_2op_p_zd_HSD<0b00010, "frintm">;
|
|
|
|
defm FRINTZ_ZPmZ : sve_fp_2op_p_zd_HSD<0b00011, "frintz">;
|
|
|
|
defm FRINTA_ZPmZ : sve_fp_2op_p_zd_HSD<0b00100, "frinta">;
|
|
|
|
defm FRINTX_ZPmZ : sve_fp_2op_p_zd_HSD<0b00110, "frintx">;
|
|
|
|
defm FRINTI_ZPmZ : sve_fp_2op_p_zd_HSD<0b00111, "frinti">;
|
|
|
|
defm FRECPX_ZPmZ : sve_fp_2op_p_zd_HSD<0b01100, "frecpx">;
|
|
|
|
defm FSQRT_ZPmZ : sve_fp_2op_p_zd_HSD<0b01101, "fsqrt">;
|
|
|
|
|
[AArch64][SVE] Asm: Support for AND, ORR, EOR and BIC instructions.
This patch addresses the following variants:
- bitmask immediate, e.g. 'and z0.d, z0.d, #0x6'.
- unpredicated data vectors, e.g. 'and z0.d, z1.d, z2.d'.
- predicated data vectors, e.g. 'and z0.d, p0/m, z0.d, z1.d'.
And also several aliases, such as:
- ORN, alias of ORR.
- EON, alias of EOR.
- BIC, alias of AND (immediate variant)
- MOV, alias of ORR (if unpredicated and source register operands are the same)
Reviewers: rengolin, huntergr, fhahn, samparker, SjoerdMeijer, javed.absar
Reviewed By: fhahn
Differential Revision: https://reviews.llvm.org/D47363
llvm-svn: 333414
2018-05-29 21:08:43 +08:00
|
|
|
// InstAliases
|
|
|
|
def : InstAlias<"mov $Zd, $Zn",
|
|
|
|
(ORR_ZZZ ZPR64:$Zd, ZPR64:$Zn, ZPR64:$Zn), 1>;
|
2018-06-17 18:11:04 +08:00
|
|
|
def : InstAlias<"mov $Pd, $Pg/m, $Pn",
|
|
|
|
(SEL_PPPP PPR8:$Pd, PPRAny:$Pg, PPR8:$Pn, PPR8:$Pd), 1>;
|
[AArch64][SVE] Asm: Support for bitwise operations on predicate vectors.
This patch adds support for instructions performing bitwise operations
on predicate vectors, including AND, BIC, EOR, NAND, NOR, ORN, ORR, and
their status flag setting variants ANDS, BICS, EORS, NANDS, ORNS, ORRS.
This patch also adds several aliases:
orr p0.b, p1/z, p1.b, p1.b => mov p0.b, p1.b
orrs p0.b, p1/z, p1.b, p1.b => movs p0.b, p1.b
and p0.b, p1/z, p2.b, p2.b => mov p0.b, p1/z, p2.b
ands p0.b, p1/z, p2.b, p2.b => movs p0.b, p1/z, p2.b
eor p0.b, p1/z, p2.b, p1.b => not p0.b, p1/z, p2.b
eors p0.b, p1/z, p2.b, p1.b => nots p0.b, p1/z, p2.b
llvm-svn: 334906
2018-06-17 18:48:21 +08:00
|
|
|
def : InstAlias<"mov $Pd, $Pn",
|
|
|
|
(ORR_PPzPP PPR8:$Pd, PPR8:$Pn, PPR8:$Pn, PPR8:$Pn), 1>;
|
|
|
|
def : InstAlias<"mov $Pd, $Pg/z, $Pn",
|
|
|
|
(AND_PPzPP PPR8:$Pd, PPRAny:$Pg, PPR8:$Pn, PPR8:$Pn), 1>;
|
|
|
|
|
|
|
|
def : InstAlias<"movs $Pd, $Pn",
|
|
|
|
(ORRS_PPzPP PPR8:$Pd, PPR8:$Pn, PPR8:$Pn, PPR8:$Pn), 1>;
|
|
|
|
def : InstAlias<"movs $Pd, $Pg/z, $Pn",
|
|
|
|
(ANDS_PPzPP PPR8:$Pd, PPRAny:$Pg, PPR8:$Pn, PPR8:$Pn), 1>;
|
|
|
|
|
|
|
|
def : InstAlias<"not $Pd, $Pg/z, $Pn",
|
|
|
|
(EOR_PPzPP PPR8:$Pd, PPRAny:$Pg, PPR8:$Pn, PPRAny:$Pg), 1>;
|
|
|
|
|
|
|
|
def : InstAlias<"nots $Pd, $Pg/z, $Pn",
|
|
|
|
(EORS_PPzPP PPR8:$Pd, PPRAny:$Pg, PPR8:$Pn, PPRAny:$Pg), 1>;
|
[AArch64][SVE] Asm: Support for vector element compares.
This patch adds instructions for comparing elements from two vectors, e.g.
cmpgt p0.s, p0/z, z0.s, z1.s
and also adds support for comparing to a 64-bit wide element vector, e.g.
cmpgt p0.s, p0/z, z0.s, z1.d
The patch also contains aliases for certain comparisons, e.g.:
cmple p0.s, p0/z, z0.s, z1.s => cmpge p0.s, p0/z, z1.s, z0.s
cmplo p0.s, p0/z, z0.s, z1.s => cmphi p0.s, p0/z, z1.s, z0.s
cmpls p0.s, p0/z, z0.s, z1.s => cmphs p0.s, p0/z, z1.s, z0.s
cmplt p0.s, p0/z, z0.s, z1.s => cmpgt p0.s, p0/z, z1.s, z0.s
llvm-svn: 334931
2018-06-18 18:59:19 +08:00
|
|
|
|
|
|
|
def : InstAlias<"cmple $Zd, $Pg/z, $Zm, $Zn",
|
|
|
|
(CMPGE_PPzZZ_B PPR8:$Zd, PPR3bAny:$Pg, ZPR8:$Zn, ZPR8:$Zm), 0>;
|
|
|
|
def : InstAlias<"cmple $Zd, $Pg/z, $Zm, $Zn",
|
|
|
|
(CMPGE_PPzZZ_H PPR16:$Zd, PPR3bAny:$Pg, ZPR16:$Zn, ZPR16:$Zm), 0>;
|
|
|
|
def : InstAlias<"cmple $Zd, $Pg/z, $Zm, $Zn",
|
|
|
|
(CMPGE_PPzZZ_S PPR32:$Zd, PPR3bAny:$Pg, ZPR32:$Zn, ZPR32:$Zm), 0>;
|
|
|
|
def : InstAlias<"cmple $Zd, $Pg/z, $Zm, $Zn",
|
|
|
|
(CMPGE_PPzZZ_D PPR64:$Zd, PPR3bAny:$Pg, ZPR64:$Zn, ZPR64:$Zm), 0>;
|
|
|
|
|
|
|
|
def : InstAlias<"cmplo $Zd, $Pg/z, $Zm, $Zn",
|
|
|
|
(CMPHI_PPzZZ_B PPR8:$Zd, PPR3bAny:$Pg, ZPR8:$Zn, ZPR8:$Zm), 0>;
|
|
|
|
def : InstAlias<"cmplo $Zd, $Pg/z, $Zm, $Zn",
|
|
|
|
(CMPHI_PPzZZ_H PPR16:$Zd, PPR3bAny:$Pg, ZPR16:$Zn, ZPR16:$Zm), 0>;
|
|
|
|
def : InstAlias<"cmplo $Zd, $Pg/z, $Zm, $Zn",
|
|
|
|
(CMPHI_PPzZZ_S PPR32:$Zd, PPR3bAny:$Pg, ZPR32:$Zn, ZPR32:$Zm), 0>;
|
|
|
|
def : InstAlias<"cmplo $Zd, $Pg/z, $Zm, $Zn",
|
|
|
|
(CMPHI_PPzZZ_D PPR64:$Zd, PPR3bAny:$Pg, ZPR64:$Zn, ZPR64:$Zm), 0>;
|
|
|
|
|
|
|
|
def : InstAlias<"cmpls $Zd, $Pg/z, $Zm, $Zn",
|
|
|
|
(CMPHS_PPzZZ_B PPR8:$Zd, PPR3bAny:$Pg, ZPR8:$Zn, ZPR8:$Zm), 0>;
|
|
|
|
def : InstAlias<"cmpls $Zd, $Pg/z, $Zm, $Zn",
|
|
|
|
(CMPHS_PPzZZ_H PPR16:$Zd, PPR3bAny:$Pg, ZPR16:$Zn, ZPR16:$Zm), 0>;
|
|
|
|
def : InstAlias<"cmpls $Zd, $Pg/z, $Zm, $Zn",
|
|
|
|
(CMPHS_PPzZZ_S PPR32:$Zd, PPR3bAny:$Pg, ZPR32:$Zn, ZPR32:$Zm), 0>;
|
|
|
|
def : InstAlias<"cmpls $Zd, $Pg/z, $Zm, $Zn",
|
|
|
|
(CMPHS_PPzZZ_D PPR64:$Zd, PPR3bAny:$Pg, ZPR64:$Zn, ZPR64:$Zm), 0>;
|
|
|
|
|
|
|
|
def : InstAlias<"cmplt $Zd, $Pg/z, $Zm, $Zn",
|
|
|
|
(CMPGT_PPzZZ_B PPR8:$Zd, PPR3bAny:$Pg, ZPR8:$Zn, ZPR8:$Zm), 0>;
|
|
|
|
def : InstAlias<"cmplt $Zd, $Pg/z, $Zm, $Zn",
|
|
|
|
(CMPGT_PPzZZ_H PPR16:$Zd, PPR3bAny:$Pg, ZPR16:$Zn, ZPR16:$Zm), 0>;
|
|
|
|
def : InstAlias<"cmplt $Zd, $Pg/z, $Zm, $Zn",
|
|
|
|
(CMPGT_PPzZZ_S PPR32:$Zd, PPR3bAny:$Pg, ZPR32:$Zn, ZPR32:$Zm), 0>;
|
|
|
|
def : InstAlias<"cmplt $Zd, $Pg/z, $Zm, $Zn",
|
|
|
|
(CMPGT_PPzZZ_D PPR64:$Zd, PPR3bAny:$Pg, ZPR64:$Zn, ZPR64:$Zm), 0>;
|
[AArch64][SVE] Asm: Support for vector element FP compare.
Contains the following variants:
- Compare with (elements from) other vector
instructions: fcmeq, fcmgt, fcmge, fcmne, fcmuo.
aliases: fcmle, fcmlt.
e.g. fcmle p0.h, p0/z, z0.h, z1.h => fcmge p0.h, p0/z, z1.h, z0.h
- Compare absolute values with (absolute values from) other vector.
instructions: facge, facgt.
aliases: facle, faclt.
e.g. facle p0.h, p0/z, z0.h, z1.h => facge p0.h, p0/z, z1.h, z0.h
- Compare vector elements with #0.0
instructions: fcmeq, fcmgt, fcmge, fcmle, fcmlt, fcmne.
e.g. fcmle p0.h, p0/z, z0.h, #0.0
llvm-svn: 336182
2018-07-03 17:07:23 +08:00
|
|
|
|
|
|
|
def : InstAlias<"facle $Zd, $Pg/z, $Zm, $Zn",
|
|
|
|
(FACGE_PPzZZ_H PPR16:$Zd, PPR3bAny:$Pg, ZPR16:$Zn, ZPR16:$Zm), 0>;
|
|
|
|
def : InstAlias<"facle $Zd, $Pg/z, $Zm, $Zn",
|
|
|
|
(FACGE_PPzZZ_S PPR32:$Zd, PPR3bAny:$Pg, ZPR32:$Zn, ZPR32:$Zm), 0>;
|
|
|
|
def : InstAlias<"facle $Zd, $Pg/z, $Zm, $Zn",
|
|
|
|
(FACGE_PPzZZ_D PPR64:$Zd, PPR3bAny:$Pg, ZPR64:$Zn, ZPR64:$Zm), 0>;
|
|
|
|
|
|
|
|
def : InstAlias<"faclt $Zd, $Pg/z, $Zm, $Zn",
|
|
|
|
(FACGT_PPzZZ_H PPR16:$Zd, PPR3bAny:$Pg, ZPR16:$Zn, ZPR16:$Zm), 0>;
|
|
|
|
def : InstAlias<"faclt $Zd, $Pg/z, $Zm, $Zn",
|
|
|
|
(FACGT_PPzZZ_S PPR32:$Zd, PPR3bAny:$Pg, ZPR32:$Zn, ZPR32:$Zm), 0>;
|
|
|
|
def : InstAlias<"faclt $Zd, $Pg/z, $Zm, $Zn",
|
|
|
|
(FACGT_PPzZZ_D PPR64:$Zd, PPR3bAny:$Pg, ZPR64:$Zn, ZPR64:$Zm), 0>;
|
|
|
|
|
|
|
|
def : InstAlias<"fcmle $Zd, $Pg/z, $Zm, $Zn",
|
|
|
|
(FCMGE_PPzZZ_H PPR16:$Zd, PPR3bAny:$Pg, ZPR16:$Zn, ZPR16:$Zm), 0>;
|
|
|
|
def : InstAlias<"fcmle $Zd, $Pg/z, $Zm, $Zn",
|
|
|
|
(FCMGE_PPzZZ_S PPR32:$Zd, PPR3bAny:$Pg, ZPR32:$Zn, ZPR32:$Zm), 0>;
|
|
|
|
def : InstAlias<"fcmle $Zd, $Pg/z, $Zm, $Zn",
|
|
|
|
(FCMGE_PPzZZ_D PPR64:$Zd, PPR3bAny:$Pg, ZPR64:$Zn, ZPR64:$Zm), 0>;
|
|
|
|
|
|
|
|
def : InstAlias<"fcmlt $Zd, $Pg/z, $Zm, $Zn",
|
|
|
|
(FCMGT_PPzZZ_H PPR16:$Zd, PPR3bAny:$Pg, ZPR16:$Zn, ZPR16:$Zm), 0>;
|
|
|
|
def : InstAlias<"fcmlt $Zd, $Pg/z, $Zm, $Zn",
|
|
|
|
(FCMGT_PPzZZ_S PPR32:$Zd, PPR3bAny:$Pg, ZPR32:$Zn, ZPR32:$Zm), 0>;
|
|
|
|
def : InstAlias<"fcmlt $Zd, $Pg/z, $Zm, $Zn",
|
|
|
|
(FCMGT_PPzZZ_D PPR64:$Zd, PPR3bAny:$Pg, ZPR64:$Zn, ZPR64:$Zm), 0>;
|
[SVE][Inline-Asm] Support for SVE asm operands
Summary:
Adds the following inline asm constraints for SVE:
- w: SVE vector register with full range, Z0 to Z31
- x: Restricted to registers Z0 to Z15 inclusive.
- y: Restricted to registers Z0 to Z7 inclusive.
This change also adds the "z" modifier to interpret a register as an SVE register.
Not all of the bitconvert patterns added by this patch are used, but they have been included here for completeness.
Reviewers: t.p.northover, sdesmalen, rovka, momchil.velikov, rengolin, cameron.mcinally, greened
Reviewed By: sdesmalen
Subscribers: javed.absar, tschuett, rkruppe, psnobl, cfe-commits, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D66302
llvm-svn: 370673
2019-09-03 00:12:31 +08:00
|
|
|
|
|
|
|
def : Pat<(nxv16i8 (bitconvert (nxv8i16 ZPR:$src))), (nxv16i8 ZPR:$src)>;
|
|
|
|
def : Pat<(nxv16i8 (bitconvert (nxv4i32 ZPR:$src))), (nxv16i8 ZPR:$src)>;
|
|
|
|
def : Pat<(nxv16i8 (bitconvert (nxv2i64 ZPR:$src))), (nxv16i8 ZPR:$src)>;
|
|
|
|
def : Pat<(nxv16i8 (bitconvert (nxv8f16 ZPR:$src))), (nxv16i8 ZPR:$src)>;
|
|
|
|
def : Pat<(nxv16i8 (bitconvert (nxv4f32 ZPR:$src))), (nxv16i8 ZPR:$src)>;
|
|
|
|
def : Pat<(nxv16i8 (bitconvert (nxv2f64 ZPR:$src))), (nxv16i8 ZPR:$src)>;
|
|
|
|
|
|
|
|
def : Pat<(nxv8i16 (bitconvert (nxv16i8 ZPR:$src))), (nxv8i16 ZPR:$src)>;
|
|
|
|
def : Pat<(nxv8i16 (bitconvert (nxv4i32 ZPR:$src))), (nxv8i16 ZPR:$src)>;
|
|
|
|
def : Pat<(nxv8i16 (bitconvert (nxv2i64 ZPR:$src))), (nxv8i16 ZPR:$src)>;
|
|
|
|
def : Pat<(nxv8i16 (bitconvert (nxv8f16 ZPR:$src))), (nxv8i16 ZPR:$src)>;
|
|
|
|
def : Pat<(nxv8i16 (bitconvert (nxv4f32 ZPR:$src))), (nxv8i16 ZPR:$src)>;
|
|
|
|
def : Pat<(nxv8i16 (bitconvert (nxv2f64 ZPR:$src))), (nxv8i16 ZPR:$src)>;
|
|
|
|
|
|
|
|
def : Pat<(nxv4i32 (bitconvert (nxv16i8 ZPR:$src))), (nxv4i32 ZPR:$src)>;
|
|
|
|
def : Pat<(nxv4i32 (bitconvert (nxv8i16 ZPR:$src))), (nxv4i32 ZPR:$src)>;
|
|
|
|
def : Pat<(nxv4i32 (bitconvert (nxv2i64 ZPR:$src))), (nxv4i32 ZPR:$src)>;
|
|
|
|
def : Pat<(nxv4i32 (bitconvert (nxv8f16 ZPR:$src))), (nxv4i32 ZPR:$src)>;
|
|
|
|
def : Pat<(nxv4i32 (bitconvert (nxv4f32 ZPR:$src))), (nxv4i32 ZPR:$src)>;
|
|
|
|
def : Pat<(nxv4i32 (bitconvert (nxv2f64 ZPR:$src))), (nxv4i32 ZPR:$src)>;
|
|
|
|
|
|
|
|
def : Pat<(nxv2i64 (bitconvert (nxv16i8 ZPR:$src))), (nxv2i64 ZPR:$src)>;
|
|
|
|
def : Pat<(nxv2i64 (bitconvert (nxv8i16 ZPR:$src))), (nxv2i64 ZPR:$src)>;
|
|
|
|
def : Pat<(nxv2i64 (bitconvert (nxv4i32 ZPR:$src))), (nxv2i64 ZPR:$src)>;
|
|
|
|
def : Pat<(nxv2i64 (bitconvert (nxv8f16 ZPR:$src))), (nxv2i64 ZPR:$src)>;
|
|
|
|
def : Pat<(nxv2i64 (bitconvert (nxv4f32 ZPR:$src))), (nxv2i64 ZPR:$src)>;
|
|
|
|
def : Pat<(nxv2i64 (bitconvert (nxv2f64 ZPR:$src))), (nxv2i64 ZPR:$src)>;
|
|
|
|
|
|
|
|
def : Pat<(nxv8f16 (bitconvert (nxv16i8 ZPR:$src))), (nxv8f16 ZPR:$src)>;
|
|
|
|
def : Pat<(nxv8f16 (bitconvert (nxv8i16 ZPR:$src))), (nxv8f16 ZPR:$src)>;
|
|
|
|
def : Pat<(nxv8f16 (bitconvert (nxv4i32 ZPR:$src))), (nxv8f16 ZPR:$src)>;
|
|
|
|
def : Pat<(nxv8f16 (bitconvert (nxv2i64 ZPR:$src))), (nxv8f16 ZPR:$src)>;
|
|
|
|
def : Pat<(nxv8f16 (bitconvert (nxv4f32 ZPR:$src))), (nxv8f16 ZPR:$src)>;
|
|
|
|
def : Pat<(nxv8f16 (bitconvert (nxv2f64 ZPR:$src))), (nxv8f16 ZPR:$src)>;
|
|
|
|
|
|
|
|
def : Pat<(nxv4f32 (bitconvert (nxv16i8 ZPR:$src))), (nxv4f32 ZPR:$src)>;
|
|
|
|
def : Pat<(nxv4f32 (bitconvert (nxv8i16 ZPR:$src))), (nxv4f32 ZPR:$src)>;
|
|
|
|
def : Pat<(nxv4f32 (bitconvert (nxv4i32 ZPR:$src))), (nxv4f32 ZPR:$src)>;
|
|
|
|
def : Pat<(nxv4f32 (bitconvert (nxv2i64 ZPR:$src))), (nxv4f32 ZPR:$src)>;
|
|
|
|
def : Pat<(nxv4f32 (bitconvert (nxv8f16 ZPR:$src))), (nxv4f32 ZPR:$src)>;
|
|
|
|
def : Pat<(nxv4f32 (bitconvert (nxv2f64 ZPR:$src))), (nxv4f32 ZPR:$src)>;
|
|
|
|
|
|
|
|
def : Pat<(nxv2f64 (bitconvert (nxv16i8 ZPR:$src))), (nxv2f64 ZPR:$src)>;
|
|
|
|
def : Pat<(nxv2f64 (bitconvert (nxv8i16 ZPR:$src))), (nxv2f64 ZPR:$src)>;
|
|
|
|
def : Pat<(nxv2f64 (bitconvert (nxv4i32 ZPR:$src))), (nxv2f64 ZPR:$src)>;
|
|
|
|
def : Pat<(nxv2f64 (bitconvert (nxv2i64 ZPR:$src))), (nxv2f64 ZPR:$src)>;
|
|
|
|
def : Pat<(nxv2f64 (bitconvert (nxv8f16 ZPR:$src))), (nxv2f64 ZPR:$src)>;
|
|
|
|
def : Pat<(nxv2f64 (bitconvert (nxv4f32 ZPR:$src))), (nxv2f64 ZPR:$src)>;
|
|
|
|
|
2017-11-08 00:58:13 +08:00
|
|
|
}
|
2019-05-14 23:01:00 +08:00
|
|
|
|
|
|
|
let Predicates = [HasSVE2] in {
|
|
|
|
// SVE2 integer multiply-add (indexed)
|
|
|
|
defm MLA_ZZZI : sve2_int_mla_by_indexed_elem<0b01, 0b0, "mla">;
|
|
|
|
defm MLS_ZZZI : sve2_int_mla_by_indexed_elem<0b01, 0b1, "mls">;
|
2019-05-14 23:10:16 +08:00
|
|
|
|
|
|
|
// SVE2 saturating multiply-add high (indexed)
|
|
|
|
defm SQRDMLAH_ZZZI : sve2_int_mla_by_indexed_elem<0b10, 0b0, "sqrdmlah">;
|
|
|
|
defm SQRDMLSH_ZZZI : sve2_int_mla_by_indexed_elem<0b10, 0b1, "sqrdmlsh">;
|
|
|
|
|
|
|
|
// SVE2 saturating multiply-add high (vectors, unpredicated)
|
|
|
|
defm SQRDMLAH_ZZZ : sve2_int_mla<0b0, "sqrdmlah">;
|
|
|
|
defm SQRDMLSH_ZZZ : sve2_int_mla<0b1, "sqrdmlsh">;
|
2019-05-16 17:07:26 +08:00
|
|
|
|
|
|
|
// SVE2 integer multiply (indexed)
|
|
|
|
defm MUL_ZZZI : sve2_int_mul_by_indexed_elem<0b1110, "mul">;
|
|
|
|
|
|
|
|
// SVE2 saturating multiply high (indexed)
|
|
|
|
defm SQDMULH_ZZZI : sve2_int_mul_by_indexed_elem<0b1100, "sqdmulh">;
|
|
|
|
defm SQRDMULH_ZZZI : sve2_int_mul_by_indexed_elem<0b1101, "sqrdmulh">;
|
|
|
|
|
|
|
|
// SVE2 signed saturating doubling multiply high (unpredicated)
|
|
|
|
defm SQDMULH_ZZZ : sve2_int_mul<0b100, "sqdmulh">;
|
|
|
|
defm SQRDMULH_ZZZ : sve2_int_mul<0b101, "sqrdmulh">;
|
|
|
|
|
|
|
|
// SVE2 integer multiply vectors (unpredicated)
|
|
|
|
defm MUL_ZZZ : sve2_int_mul<0b000, "mul">;
|
|
|
|
defm SMULH_ZZZ : sve2_int_mul<0b010, "smulh">;
|
|
|
|
defm UMULH_ZZZ : sve2_int_mul<0b011, "umulh">;
|
|
|
|
def PMUL_ZZZ_B : sve2_int_mul<0b00, 0b001, "pmul", ZPR8>;
|
[AArch64][SVE2] Asm: implement CDOT instruction
Summary:
The complex DOT instructions perform a dot-product on quadtuplets from
two source vectors and the resuling wide real or wide imaginary is
accumulated into the destination register. The instructions come in two
forms:
Vector form, e.g.
cdot z0.s, z1.b, z2.b, #90 - complex dot product on four 8-bit quad-tuplets,
accumulating results in 32-bit elements. The
complex numbers in the second source vector are
rotated by 90 degrees.
cdot z0.d, z1.h, z2.h, #180 - complex dot product on four 16-bit quad-tuplets,
accumulating results in 64-bit elements.
The complex numbers in the second source
vector are rotated by 180 degrees.
Indexed form, e.g.
cdot z0.s, z1.b, z2.b[3], #0 - complex dot product on four 8-bit quad-tuplets,
with specified quadtuplet from second source vector,
accumulating results in 32-bit elements.
cdot z0.d, z1.h, z2.h[1], #0 - complex dot product on four 16-bit quad-tuplets,
with specified quadtuplet from second source vector,
accumulating results in 64-bit elements.
The specification can be found here:
https://developer.arm.com/docs/ddi0602/latest
Reviewed By: SjoerdMeijer, rovka
Differential Revision: https://reviews.llvm.org/D61903
llvm-svn: 360870
2019-05-16 17:33:44 +08:00
|
|
|
|
|
|
|
// SVE2 complex integer dot product (indexed)
|
|
|
|
defm CDOT_ZZZI : sve2_cintx_dot_by_indexed_elem<"cdot">;
|
|
|
|
|
|
|
|
// SVE2 complex integer dot product
|
|
|
|
defm CDOT_ZZZ : sve2_cintx_dot<"cdot">;
|
2019-05-16 17:42:22 +08:00
|
|
|
|
|
|
|
// SVE2 complex integer multiply-add (indexed)
|
|
|
|
defm CMLA_ZZZI : sve2_cmla_by_indexed_elem<0b0, "cmla">;
|
|
|
|
// SVE2 complex saturating multiply-add (indexed)
|
|
|
|
defm SQRDCMLAH_ZZZI : sve2_cmla_by_indexed_elem<0b1, "sqrdcmlah">;
|
|
|
|
|
|
|
|
// SVE2 complex integer multiply-add
|
|
|
|
defm CMLA_ZZZ : sve2_int_cmla<0b0, "cmla">;
|
|
|
|
defm SQRDCMLAH_ZZZ : sve2_int_cmla<0b1, "sqrdcmlah">;
|
2019-05-17 17:04:44 +08:00
|
|
|
|
|
|
|
// SVE2 integer multiply long (indexed)
|
|
|
|
defm SMULLB_ZZZI : sve2_int_mul_long_by_indexed_elem<0b000, "smullb">;
|
|
|
|
defm SMULLT_ZZZI : sve2_int_mul_long_by_indexed_elem<0b001, "smullt">;
|
|
|
|
defm UMULLB_ZZZI : sve2_int_mul_long_by_indexed_elem<0b010, "umullb">;
|
|
|
|
defm UMULLT_ZZZI : sve2_int_mul_long_by_indexed_elem<0b011, "umullt">;
|
|
|
|
|
|
|
|
// SVE2 saturating multiply (indexed)
|
|
|
|
defm SQDMULLB_ZZZI : sve2_int_mul_long_by_indexed_elem<0b100, "sqdmullb">;
|
|
|
|
defm SQDMULLT_ZZZI : sve2_int_mul_long_by_indexed_elem<0b101, "sqdmullt">;
|
|
|
|
|
2019-05-17 17:19:41 +08:00
|
|
|
// SVE2 integer multiply-add long (indexed)
|
|
|
|
defm SMLALB_ZZZI : sve2_int_mla_long_by_indexed_elem<0b1000, "smlalb">;
|
|
|
|
defm SMLALT_ZZZI : sve2_int_mla_long_by_indexed_elem<0b1001, "smlalt">;
|
|
|
|
defm UMLALB_ZZZI : sve2_int_mla_long_by_indexed_elem<0b1010, "umlalb">;
|
|
|
|
defm UMLALT_ZZZI : sve2_int_mla_long_by_indexed_elem<0b1011, "umlalt">;
|
|
|
|
defm SMLSLB_ZZZI : sve2_int_mla_long_by_indexed_elem<0b1100, "smlslb">;
|
|
|
|
defm SMLSLT_ZZZI : sve2_int_mla_long_by_indexed_elem<0b1101, "smlslt">;
|
|
|
|
defm UMLSLB_ZZZI : sve2_int_mla_long_by_indexed_elem<0b1110, "umlslb">;
|
|
|
|
defm UMLSLT_ZZZI : sve2_int_mla_long_by_indexed_elem<0b1111, "umlslt">;
|
|
|
|
|
|
|
|
// SVE2 integer multiply-add long (vectors, unpredicated)
|
|
|
|
defm SMLALB_ZZZ : sve2_int_mla_long<0b10000, "smlalb">;
|
|
|
|
defm SMLALT_ZZZ : sve2_int_mla_long<0b10001, "smlalt">;
|
|
|
|
defm UMLALB_ZZZ : sve2_int_mla_long<0b10010, "umlalb">;
|
|
|
|
defm UMLALT_ZZZ : sve2_int_mla_long<0b10011, "umlalt">;
|
|
|
|
defm SMLSLB_ZZZ : sve2_int_mla_long<0b10100, "smlslb">;
|
|
|
|
defm SMLSLT_ZZZ : sve2_int_mla_long<0b10101, "smlslt">;
|
|
|
|
defm UMLSLB_ZZZ : sve2_int_mla_long<0b10110, "umlslb">;
|
|
|
|
defm UMLSLT_ZZZ : sve2_int_mla_long<0b10111, "umlslt">;
|
|
|
|
|
2019-05-17 17:29:43 +08:00
|
|
|
// SVE2 saturating multiply-add long (indexed)
|
|
|
|
defm SQDMLALB_ZZZI : sve2_int_mla_long_by_indexed_elem<0b0100, "sqdmlalb">;
|
|
|
|
defm SQDMLALT_ZZZI : sve2_int_mla_long_by_indexed_elem<0b0101, "sqdmlalt">;
|
|
|
|
defm SQDMLSLB_ZZZI : sve2_int_mla_long_by_indexed_elem<0b0110, "sqdmlslb">;
|
|
|
|
defm SQDMLSLT_ZZZI : sve2_int_mla_long_by_indexed_elem<0b0111, "sqdmlslt">;
|
|
|
|
|
|
|
|
// SVE2 saturating multiply-add long (vectors, unpredicated)
|
|
|
|
defm SQDMLALB_ZZZ : sve2_int_mla_long<0b11000, "sqdmlalb">;
|
|
|
|
defm SQDMLALT_ZZZ : sve2_int_mla_long<0b11001, "sqdmlalt">;
|
|
|
|
defm SQDMLSLB_ZZZ : sve2_int_mla_long<0b11010, "sqdmlslb">;
|
|
|
|
defm SQDMLSLT_ZZZ : sve2_int_mla_long<0b11011, "sqdmlslt">;
|
|
|
|
|
2019-05-20 18:29:48 +08:00
|
|
|
// SVE2 saturating multiply-add interleaved long
|
|
|
|
defm SQDMLALBT_ZZZ : sve2_int_mla_long<0b00010, "sqdmlalbt">;
|
|
|
|
defm SQDMLSLBT_ZZZ : sve2_int_mla_long<0b00011, "sqdmlslbt">;
|
|
|
|
|
2019-05-20 18:35:23 +08:00
|
|
|
// SVE2 integer halving add/subtract (predicated)
|
|
|
|
defm SHADD_ZPmZ : sve2_int_arith_pred<0b100000, "shadd">;
|
|
|
|
defm UHADD_ZPmZ : sve2_int_arith_pred<0b100010, "uhadd">;
|
|
|
|
defm SHSUB_ZPmZ : sve2_int_arith_pred<0b100100, "shsub">;
|
|
|
|
defm UHSUB_ZPmZ : sve2_int_arith_pred<0b100110, "uhsub">;
|
|
|
|
defm SRHADD_ZPmZ : sve2_int_arith_pred<0b101000, "srhadd">;
|
|
|
|
defm URHADD_ZPmZ : sve2_int_arith_pred<0b101010, "urhadd">;
|
|
|
|
defm SHSUBR_ZPmZ : sve2_int_arith_pred<0b101100, "shsubr">;
|
|
|
|
defm UHSUBR_ZPmZ : sve2_int_arith_pred<0b101110, "uhsubr">;
|
|
|
|
|
2019-05-20 21:50:15 +08:00
|
|
|
// SVE2 integer pairwise add and accumulate long
|
|
|
|
defm SADALP_ZPmZ : sve2_int_sadd_long_accum_pairwise<0, "sadalp">;
|
|
|
|
defm UADALP_ZPmZ : sve2_int_sadd_long_accum_pairwise<1, "uadalp">;
|
|
|
|
|
2019-05-21 16:59:00 +08:00
|
|
|
// SVE2 integer pairwise arithmetic
|
|
|
|
defm ADDP_ZPmZ : sve2_int_arith_pred<0b100011, "addp">;
|
|
|
|
defm SMAXP_ZPmZ : sve2_int_arith_pred<0b101001, "smaxp">;
|
|
|
|
defm UMAXP_ZPmZ : sve2_int_arith_pred<0b101011, "umaxp">;
|
|
|
|
defm SMINP_ZPmZ : sve2_int_arith_pred<0b101101, "sminp">;
|
|
|
|
defm UMINP_ZPmZ : sve2_int_arith_pred<0b101111, "uminp">;
|
|
|
|
|
2019-05-21 17:06:51 +08:00
|
|
|
// SVE2 integer unary operations (predicated)
|
|
|
|
defm URECPE_ZPmZ : sve2_int_un_pred_arit_s<0b000, "urecpe">;
|
|
|
|
defm URSQRTE_ZPmZ : sve2_int_un_pred_arit_s<0b001, "ursqrte">;
|
|
|
|
defm SQABS_ZPmZ : sve2_int_un_pred_arit<0b100, "sqabs">;
|
|
|
|
defm SQNEG_ZPmZ : sve2_int_un_pred_arit<0b101, "sqneg">;
|
|
|
|
|
2019-05-24 17:06:37 +08:00
|
|
|
// SVE2 saturating add/subtract
|
|
|
|
defm SQADD_ZPmZ : sve2_int_arith_pred<0b110000, "sqadd">;
|
|
|
|
defm UQADD_ZPmZ : sve2_int_arith_pred<0b110010, "uqadd">;
|
|
|
|
defm SQSUB_ZPmZ : sve2_int_arith_pred<0b110100, "sqsub">;
|
|
|
|
defm UQSUB_ZPmZ : sve2_int_arith_pred<0b110110, "uqsub">;
|
|
|
|
defm SUQADD_ZPmZ : sve2_int_arith_pred<0b111000, "suqadd">;
|
|
|
|
defm USQADD_ZPmZ : sve2_int_arith_pred<0b111010, "usqadd">;
|
|
|
|
defm SQSUBR_ZPmZ : sve2_int_arith_pred<0b111100, "sqsubr">;
|
|
|
|
defm UQSUBR_ZPmZ : sve2_int_arith_pred<0b111110, "uqsubr">;
|
|
|
|
|
[AArch64][SVE2] Asm: add various bitwise shift instructions
Summary:
This patch adds support for the SVE2 saturating/rounding bitwise shift
left (predicated) group of instructions:
* SRSHL, URSHL, SRSHLR, URSHLR, SQSHL, UQSHL, SQRSHL, UQRSHL,
SQSHLR, UQSHLR, SQRSHLR, UQRSHLR
Immediate forms of the SQSHL and UQSHL instructions are also added to
the existing SVE bitwise shift by immediate (predicated) group, as well
as three new instructions SRSHR/URSHR/SQSHLU. The new instructions in
this group are encoded similarly and are implemented using the same
TableGen class with a minimal change (1 bit in encoding).
The specification can be found here:
https://developer.arm.com/docs/ddi0602/latest
Reviewed By: SjoerdMeijer
Differential Revision: https://reviews.llvm.org/D62140
llvm-svn: 361612
2019-05-24 17:17:23 +08:00
|
|
|
// SVE2 saturating/rounding bitwise shift left (predicated)
|
|
|
|
defm SRSHL_ZPmZ : sve2_int_arith_pred<0b000100, "srshl">;
|
|
|
|
defm URSHL_ZPmZ : sve2_int_arith_pred<0b000110, "urshl">;
|
|
|
|
defm SRSHLR_ZPmZ : sve2_int_arith_pred<0b001100, "srshlr">;
|
|
|
|
defm URSHLR_ZPmZ : sve2_int_arith_pred<0b001110, "urshlr">;
|
|
|
|
defm SQSHL_ZPmZ : sve2_int_arith_pred<0b010000, "sqshl">;
|
|
|
|
defm UQSHL_ZPmZ : sve2_int_arith_pred<0b010010, "uqshl">;
|
|
|
|
defm SQRSHL_ZPmZ : sve2_int_arith_pred<0b010100, "sqrshl">;
|
|
|
|
defm UQRSHL_ZPmZ : sve2_int_arith_pred<0b010110, "uqrshl">;
|
|
|
|
defm SQSHLR_ZPmZ : sve2_int_arith_pred<0b011000, "sqshlr">;
|
|
|
|
defm UQSHLR_ZPmZ : sve2_int_arith_pred<0b011010, "uqshlr">;
|
|
|
|
defm SQRSHLR_ZPmZ : sve2_int_arith_pred<0b011100, "sqrshlr">;
|
|
|
|
defm UQRSHLR_ZPmZ : sve2_int_arith_pred<0b011110, "uqrshlr">;
|
|
|
|
|
2019-07-31 16:58:16 +08:00
|
|
|
// SVE2 predicated shifts
|
|
|
|
defm SQSHL_ZPmI : sve_int_bin_pred_shift_imm_left< 0b0110, "sqshl">;
|
|
|
|
defm UQSHL_ZPmI : sve_int_bin_pred_shift_imm_left< 0b0111, "uqshl">;
|
|
|
|
defm SRSHR_ZPmI : sve_int_bin_pred_shift_imm_right<0b1100, "srshr">;
|
|
|
|
defm URSHR_ZPmI : sve_int_bin_pred_shift_imm_right<0b1101, "urshr">;
|
|
|
|
defm SQSHLU_ZPmI : sve_int_bin_pred_shift_imm_left< 0b1111, "sqshlu">;
|
|
|
|
|
[AArch64][SVE2] Asm: add integer add/sub long/wide instructions
Summary:
Patch adds support for the following instructions:
SVE2 integer add/subtract long:
* SADDLB, SADDLT, UADDLB, UADDLT, SSUBLB, SSUBLT, USUBLB, USUBLT,
SABDLB, SABDLT, UABDLB, UABDLT
SVE2 integer add/subtract wide:
* SADDWB, SADDWT, UADDWB, UADDWT, SSUBWB, SSUBWT, USUBWB, USUBWT
The specification can be found here:
https://developer.arm.com/docs/ddi0602/latest
Reviewed By: SjoerdMeijer
Differential Revision: https://reviews.llvm.org/D62142
llvm-svn: 361615
2019-05-24 17:28:27 +08:00
|
|
|
// SVE2 integer add/subtract long
|
|
|
|
defm SADDLB_ZZZ : sve2_wide_int_arith_long<0b00000, "saddlb">;
|
|
|
|
defm SADDLT_ZZZ : sve2_wide_int_arith_long<0b00001, "saddlt">;
|
|
|
|
defm UADDLB_ZZZ : sve2_wide_int_arith_long<0b00010, "uaddlb">;
|
|
|
|
defm UADDLT_ZZZ : sve2_wide_int_arith_long<0b00011, "uaddlt">;
|
|
|
|
defm SSUBLB_ZZZ : sve2_wide_int_arith_long<0b00100, "ssublb">;
|
|
|
|
defm SSUBLT_ZZZ : sve2_wide_int_arith_long<0b00101, "ssublt">;
|
|
|
|
defm USUBLB_ZZZ : sve2_wide_int_arith_long<0b00110, "usublb">;
|
|
|
|
defm USUBLT_ZZZ : sve2_wide_int_arith_long<0b00111, "usublt">;
|
|
|
|
defm SABDLB_ZZZ : sve2_wide_int_arith_long<0b01100, "sabdlb">;
|
|
|
|
defm SABDLT_ZZZ : sve2_wide_int_arith_long<0b01101, "sabdlt">;
|
|
|
|
defm UABDLB_ZZZ : sve2_wide_int_arith_long<0b01110, "uabdlb">;
|
|
|
|
defm UABDLT_ZZZ : sve2_wide_int_arith_long<0b01111, "uabdlt">;
|
|
|
|
|
|
|
|
// SVE2 integer add/subtract wide
|
|
|
|
defm SADDWB_ZZZ : sve2_wide_int_arith_wide<0b000, "saddwb">;
|
|
|
|
defm SADDWT_ZZZ : sve2_wide_int_arith_wide<0b001, "saddwt">;
|
|
|
|
defm UADDWB_ZZZ : sve2_wide_int_arith_wide<0b010, "uaddwb">;
|
|
|
|
defm UADDWT_ZZZ : sve2_wide_int_arith_wide<0b011, "uaddwt">;
|
|
|
|
defm SSUBWB_ZZZ : sve2_wide_int_arith_wide<0b100, "ssubwb">;
|
|
|
|
defm SSUBWT_ZZZ : sve2_wide_int_arith_wide<0b101, "ssubwt">;
|
|
|
|
defm USUBWB_ZZZ : sve2_wide_int_arith_wide<0b110, "usubwb">;
|
|
|
|
defm USUBWT_ZZZ : sve2_wide_int_arith_wide<0b111, "usubwt">;
|
|
|
|
|
2019-05-17 17:04:44 +08:00
|
|
|
// SVE2 integer multiply long
|
|
|
|
defm SQDMULLB_ZZZ : sve2_wide_int_arith_long<0b11000, "sqdmullb">;
|
|
|
|
defm SQDMULLT_ZZZ : sve2_wide_int_arith_long<0b11001, "sqdmullt">;
|
|
|
|
defm SMULLB_ZZZ : sve2_wide_int_arith_long<0b11100, "smullb">;
|
|
|
|
defm SMULLT_ZZZ : sve2_wide_int_arith_long<0b11101, "smullt">;
|
|
|
|
defm UMULLB_ZZZ : sve2_wide_int_arith_long<0b11110, "umullb">;
|
|
|
|
defm UMULLT_ZZZ : sve2_wide_int_arith_long<0b11111, "umullt">;
|
2019-05-24 17:56:23 +08:00
|
|
|
defm PMULLB_ZZZ : sve2_pmul_long<0b0, "pmullb">;
|
|
|
|
defm PMULLT_ZZZ : sve2_pmul_long<0b1, "pmullt">;
|
[AArch64][SVE2] Asm: add various bitwise shift instructions
Summary:
This patch adds support for the SVE2 saturating/rounding bitwise shift
left (predicated) group of instructions:
* SRSHL, URSHL, SRSHLR, URSHLR, SQSHL, UQSHL, SQRSHL, UQRSHL,
SQSHLR, UQSHLR, SQRSHLR, UQRSHLR
Immediate forms of the SQSHL and UQSHL instructions are also added to
the existing SVE bitwise shift by immediate (predicated) group, as well
as three new instructions SRSHR/URSHR/SQSHLU. The new instructions in
this group are encoded similarly and are implemented using the same
TableGen class with a minimal change (1 bit in encoding).
The specification can be found here:
https://developer.arm.com/docs/ddi0602/latest
Reviewed By: SjoerdMeijer
Differential Revision: https://reviews.llvm.org/D62140
llvm-svn: 361612
2019-05-24 17:17:23 +08:00
|
|
|
|
[AArch64][SVE2] Asm: support SVE2 Accumulate Group
Summary:
Patch adds support for the following instructions:
SVE2 bitwise shift and insert:
* SRI, SLI
SVE2 bitwise shift right and accumulate:
* SSRA, USRA, SRSRA, URSRA
SVE2 complex integer add:
* CADD, SQCADD
SVE2 integer absolute difference and accumulate:
* SABA, UABA
SVE2 integer absolute difference and accumulate long:
* SABALB, SABALT, UABALB, UABALT
SVE2 integer add/subtract long with carry:
* ADCLB, ADCLT, SBCLB, SBCLT
The specification can be found here:
https://developer.arm.com/docs/ddi0602/latest
Reviewed By: SjoerdMeijer
Differential Revision: https://reviews.llvm.org/D62204
llvm-svn: 361622
2019-05-24 18:10:34 +08:00
|
|
|
// SVE2 bitwise shift and insert
|
2019-07-31 16:45:57 +08:00
|
|
|
defm SRI_ZZI : sve2_int_bin_shift_imm_right<0b0, "sri">;
|
|
|
|
defm SLI_ZZI : sve2_int_bin_shift_imm_left< 0b1, "sli">;
|
[AArch64][SVE2] Asm: support SVE2 Accumulate Group
Summary:
Patch adds support for the following instructions:
SVE2 bitwise shift and insert:
* SRI, SLI
SVE2 bitwise shift right and accumulate:
* SSRA, USRA, SRSRA, URSRA
SVE2 complex integer add:
* CADD, SQCADD
SVE2 integer absolute difference and accumulate:
* SABA, UABA
SVE2 integer absolute difference and accumulate long:
* SABALB, SABALT, UABALB, UABALT
SVE2 integer add/subtract long with carry:
* ADCLB, ADCLT, SBCLB, SBCLT
The specification can be found here:
https://developer.arm.com/docs/ddi0602/latest
Reviewed By: SjoerdMeijer
Differential Revision: https://reviews.llvm.org/D62204
llvm-svn: 361622
2019-05-24 18:10:34 +08:00
|
|
|
|
|
|
|
// SVE2 bitwise shift right and accumulate
|
2019-07-31 16:58:16 +08:00
|
|
|
defm SSRA_ZZI : sve2_int_bin_accum_shift_imm_right<0b00, "ssra">;
|
|
|
|
defm USRA_ZZI : sve2_int_bin_accum_shift_imm_right<0b01, "usra">;
|
|
|
|
defm SRSRA_ZZI : sve2_int_bin_accum_shift_imm_right<0b10, "srsra">;
|
|
|
|
defm URSRA_ZZI : sve2_int_bin_accum_shift_imm_right<0b11, "ursra">;
|
[AArch64][SVE2] Asm: support SVE2 Accumulate Group
Summary:
Patch adds support for the following instructions:
SVE2 bitwise shift and insert:
* SRI, SLI
SVE2 bitwise shift right and accumulate:
* SSRA, USRA, SRSRA, URSRA
SVE2 complex integer add:
* CADD, SQCADD
SVE2 integer absolute difference and accumulate:
* SABA, UABA
SVE2 integer absolute difference and accumulate long:
* SABALB, SABALT, UABALB, UABALT
SVE2 integer add/subtract long with carry:
* ADCLB, ADCLT, SBCLB, SBCLT
The specification can be found here:
https://developer.arm.com/docs/ddi0602/latest
Reviewed By: SjoerdMeijer
Differential Revision: https://reviews.llvm.org/D62204
llvm-svn: 361622
2019-05-24 18:10:34 +08:00
|
|
|
|
|
|
|
// SVE2 complex integer add
|
|
|
|
defm CADD_ZZI : sve2_int_cadd<0b0, "cadd">;
|
|
|
|
defm SQCADD_ZZI : sve2_int_cadd<0b1, "sqcadd">;
|
|
|
|
|
|
|
|
// SVE2 integer absolute difference and accumulate
|
|
|
|
defm SABA_ZZZ : sve2_int_absdiff_accum<0b0, "saba">;
|
|
|
|
defm UABA_ZZZ : sve2_int_absdiff_accum<0b1, "uaba">;
|
|
|
|
|
|
|
|
// SVE2 integer absolute difference and accumulate long
|
|
|
|
defm SABALB_ZZZ : sve2_int_absdiff_accum_long<0b00, "sabalb">;
|
|
|
|
defm SABALT_ZZZ : sve2_int_absdiff_accum_long<0b01, "sabalt">;
|
|
|
|
defm UABALB_ZZZ : sve2_int_absdiff_accum_long<0b10, "uabalb">;
|
|
|
|
defm UABALT_ZZZ : sve2_int_absdiff_accum_long<0b11, "uabalt">;
|
|
|
|
|
|
|
|
// SVE2 integer add/subtract long with carry
|
|
|
|
defm ADCLB_ZZZ : sve2_int_addsub_long_carry<0b00, "adclb">;
|
|
|
|
defm ADCLT_ZZZ : sve2_int_addsub_long_carry<0b01, "adclt">;
|
|
|
|
defm SBCLB_ZZZ : sve2_int_addsub_long_carry<0b10, "sbclb">;
|
|
|
|
defm SBCLT_ZZZ : sve2_int_addsub_long_carry<0b11, "sbclt">;
|
|
|
|
|
2019-07-31 16:45:57 +08:00
|
|
|
// SVE2 bitwise shift right narrow (bottom)
|
|
|
|
defm SQSHRUNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b000, "sqshrunb">;
|
|
|
|
defm SQRSHRUNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b001, "sqrshrunb">;
|
|
|
|
defm SHRNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b010, "shrnb">;
|
|
|
|
defm RSHRNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b011, "rshrnb">;
|
|
|
|
defm SQSHRNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b100, "sqshrnb">;
|
|
|
|
defm SQRSHRNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b101, "sqrshrnb">;
|
|
|
|
defm UQSHRNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b110, "uqshrnb">;
|
|
|
|
defm UQRSHRNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b111, "uqrshrnb">;
|
|
|
|
|
|
|
|
// SVE2 bitwise shift right narrow (top)
|
|
|
|
defm SQSHRUNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b000, "sqshrunt">;
|
|
|
|
defm SQRSHRUNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b001, "sqrshrunt">;
|
|
|
|
defm SHRNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b010, "shrnt">;
|
|
|
|
defm RSHRNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b011, "rshrnt">;
|
|
|
|
defm SQSHRNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b100, "sqshrnt">;
|
|
|
|
defm SQRSHRNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b101, "sqrshrnt">;
|
|
|
|
defm UQSHRNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b110, "uqshrnt">;
|
|
|
|
defm UQRSHRNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b111, "uqrshrnt">;
|
|
|
|
|
|
|
|
// SVE2 integer add/subtract narrow high part (bottom)
|
|
|
|
defm ADDHNB_ZZZ : sve2_int_addsub_narrow_high_bottom<0b00, "addhnb">;
|
|
|
|
defm RADDHNB_ZZZ : sve2_int_addsub_narrow_high_bottom<0b01, "raddhnb">;
|
|
|
|
defm SUBHNB_ZZZ : sve2_int_addsub_narrow_high_bottom<0b10, "subhnb">;
|
|
|
|
defm RSUBHNB_ZZZ : sve2_int_addsub_narrow_high_bottom<0b11, "rsubhnb">;
|
|
|
|
|
|
|
|
// SVE2 integer add/subtract narrow high part (top)
|
|
|
|
defm ADDHNT_ZZZ : sve2_int_addsub_narrow_high_top<0b00, "addhnt">;
|
|
|
|
defm RADDHNT_ZZZ : sve2_int_addsub_narrow_high_top<0b01, "raddhnt">;
|
|
|
|
defm SUBHNT_ZZZ : sve2_int_addsub_narrow_high_top<0b10, "subhnt">;
|
|
|
|
defm RSUBHNT_ZZZ : sve2_int_addsub_narrow_high_top<0b11, "rsubhnt">;
|
|
|
|
|
|
|
|
// SVE2 saturating extract narrow (bottom)
|
|
|
|
defm SQXTNB_ZZ : sve2_int_sat_extract_narrow_bottom<0b00, "sqxtnb">;
|
|
|
|
defm UQXTNB_ZZ : sve2_int_sat_extract_narrow_bottom<0b01, "uqxtnb">;
|
|
|
|
defm SQXTUNB_ZZ : sve2_int_sat_extract_narrow_bottom<0b10, "sqxtunb">;
|
|
|
|
|
|
|
|
// SVE2 saturating extract narrow (top)
|
|
|
|
defm SQXTNT_ZZ : sve2_int_sat_extract_narrow_top<0b00, "sqxtnt">;
|
|
|
|
defm UQXTNT_ZZ : sve2_int_sat_extract_narrow_top<0b01, "uqxtnt">;
|
|
|
|
defm SQXTUNT_ZZ : sve2_int_sat_extract_narrow_top<0b10, "sqxtunt">;
|
[AArch64][SVE2] Asm: support SVE2 Narrowing Group
Summary:
Patch adds support for the following instructions:
SVE2 bitwise shift right narrow:
* SQSHRUNB, SQSHRUNT, SQRSHRUNB, SQRSHRUNT, SHRNB, SHRNT, RSHRNB, RSHRNT,
SQSHRNB, SQSHRNT, SQRSHRNB, SQRSHRNT, UQSHRNB, UQSHRNT, UQRSHRNB,
UQRSHRNT
SVE2 integer add/subtract narrow high part:
* ADDHNB, ADDHNT, RADDHNB, RADDHNT, SUBHNB, SUBHNT, RSUBHNB, RSUBHNT
SVE2 saturating extract narrow:
* SQXTNB, SQXTNT, UQXTNB, UQXTNT, SQXTUNB, SQXTUNT
The specification can be found here:
https://developer.arm.com/docs/ddi0602/latest
Reviewed By: SjoerdMeijer
Differential Revision: https://reviews.llvm.org/D62205
llvm-svn: 361624
2019-05-24 18:22:30 +08:00
|
|
|
|
2019-05-24 18:32:01 +08:00
|
|
|
// SVE2 character match
|
|
|
|
defm MATCH_PPzZZ : sve2_char_match<0b0, "match">;
|
|
|
|
defm NMATCH_PPzZZ : sve2_char_match<0b1, "nmatch">;
|
|
|
|
|
[AArch64][SVE2] Asm: support SVE2 Misc Group
Summary:
Patch adds support for the following instructions:
SVE2 bitwise exclusive-or interleaved:
* EORBT, EORTB
SVE2 bitwise permute:
* BEXT, BDEP, BGRP
SVE2 bitwise shift left long:
* SSHLLB, SSHLLT, USHLLB, USHLLT
SVE2 integer add/subtract interleaved long:
* SADDLBT, SSUBLBT, SSUBLTB
BDEP, BEXT and BGRP are enabled with SVE2 feature +bitperm, all other
instructions in this group are enabled with +sve2.
Reviewed By: chill
Differential Revision: https://reviews.llvm.org/D62304
llvm-svn: 361795
2019-05-28 16:42:22 +08:00
|
|
|
// SVE2 bitwise exclusive-or interleaved
|
|
|
|
defm EORBT_ZZZ : sve2_bitwise_xor_interleaved<0b0, "eorbt">;
|
|
|
|
defm EORTB_ZZZ : sve2_bitwise_xor_interleaved<0b1, "eortb">;
|
|
|
|
|
|
|
|
// SVE2 bitwise shift left long
|
|
|
|
defm SSHLLB_ZZI : sve2_bitwise_shift_left_long<0b00, "sshllb">;
|
|
|
|
defm SSHLLT_ZZI : sve2_bitwise_shift_left_long<0b01, "sshllt">;
|
|
|
|
defm USHLLB_ZZI : sve2_bitwise_shift_left_long<0b10, "ushllb">;
|
|
|
|
defm USHLLT_ZZI : sve2_bitwise_shift_left_long<0b11, "ushllt">;
|
|
|
|
|
|
|
|
// SVE2 integer add/subtract interleaved long
|
|
|
|
defm SADDLBT_ZZZ : sve2_misc_int_addsub_long_interleaved<0b00, "saddlbt">;
|
|
|
|
defm SSUBLBT_ZZZ : sve2_misc_int_addsub_long_interleaved<0b10, "ssublbt">;
|
|
|
|
defm SSUBLTB_ZZZ : sve2_misc_int_addsub_long_interleaved<0b11, "ssubltb">;
|
|
|
|
|
2019-05-28 16:51:59 +08:00
|
|
|
// SVE2 histogram generation (segment)
|
|
|
|
def HISTSEG_ZZZ : sve2_hist_gen_segment<"histseg">;
|
|
|
|
|
|
|
|
// SVE2 histogram generation (vector)
|
|
|
|
defm HISTCNT_ZPzZZ : sve2_hist_gen_vector<"histcnt">;
|
|
|
|
|
2019-07-31 16:58:16 +08:00
|
|
|
// SVE2 floating-point base 2 logarithm as integer
|
|
|
|
defm FLOGB_ZPmZ : sve2_fp_flogb<"flogb">;
|
|
|
|
|
2019-05-28 17:36:52 +08:00
|
|
|
// SVE2 floating-point convert precision
|
|
|
|
defm FCVTXNT_ZPmZ : sve2_fp_convert_down_odd_rounding<"fcvtxnt">;
|
|
|
|
defm FCVTNT_ZPmZ : sve2_fp_convert_down_narrow<"fcvtnt">;
|
|
|
|
defm FCVTLT_ZPmZ : sve2_fp_convert_up_long<"fcvtlt">;
|
2019-07-31 16:58:16 +08:00
|
|
|
def FCVTX_ZPmZ_DtoS : sve_fp_2op_p_zd<0b0001010, "fcvtx", ZPR64, ZPR32, ElementSizeD>;
|
2019-05-28 17:36:52 +08:00
|
|
|
|
2019-05-29 16:40:33 +08:00
|
|
|
// SVE2 floating-point pairwise operations
|
|
|
|
defm FADDP_ZPmZZ : sve2_fp_pairwise_pred<0b000, "faddp">;
|
|
|
|
defm FMAXNMP_ZPmZZ : sve2_fp_pairwise_pred<0b100, "fmaxnmp">;
|
|
|
|
defm FMINNMP_ZPmZZ : sve2_fp_pairwise_pred<0b101, "fminnmp">;
|
|
|
|
defm FMAXP_ZPmZZ : sve2_fp_pairwise_pred<0b110, "fmaxp">;
|
|
|
|
defm FMINP_ZPmZZ : sve2_fp_pairwise_pred<0b111, "fminp">;
|
|
|
|
|
2019-05-29 16:53:06 +08:00
|
|
|
// SVE2 floating-point multiply-add long (indexed)
|
|
|
|
def FMLALB_ZZZI_SHH : sve2_fp_mla_long_by_indexed_elem<0b00, "fmlalb">;
|
|
|
|
def FMLALT_ZZZI_SHH : sve2_fp_mla_long_by_indexed_elem<0b01, "fmlalt">;
|
|
|
|
def FMLSLB_ZZZI_SHH : sve2_fp_mla_long_by_indexed_elem<0b10, "fmlslb">;
|
|
|
|
def FMLSLT_ZZZI_SHH : sve2_fp_mla_long_by_indexed_elem<0b11, "fmlslt">;
|
|
|
|
|
|
|
|
// SVE2 floating-point multiply-add long
|
|
|
|
def FMLALB_ZZZ_SHH : sve2_fp_mla_long<0b00, "fmlalb">;
|
|
|
|
def FMLALT_ZZZ_SHH : sve2_fp_mla_long<0b01, "fmlalt">;
|
|
|
|
def FMLSLB_ZZZ_SHH : sve2_fp_mla_long<0b10, "fmlslb">;
|
|
|
|
def FMLSLT_ZZZ_SHH : sve2_fp_mla_long<0b11, "fmlslt">;
|
|
|
|
|
2019-05-29 17:03:27 +08:00
|
|
|
// SVE2 bitwise ternary operations
|
|
|
|
defm EOR3_ZZZZ_D : sve2_int_bitwise_ternary_op<0b000, "eor3">;
|
|
|
|
defm BCAX_ZZZZ_D : sve2_int_bitwise_ternary_op<0b010, "bcax">;
|
|
|
|
def BSL_ZZZZ_D : sve2_int_bitwise_ternary_op_d<0b001, "bsl">;
|
|
|
|
def BSL1N_ZZZZ_D : sve2_int_bitwise_ternary_op_d<0b011, "bsl1n">;
|
|
|
|
def BSL2N_ZZZZ_D : sve2_int_bitwise_ternary_op_d<0b101, "bsl2n">;
|
|
|
|
def NBSL_ZZZZ_D : sve2_int_bitwise_ternary_op_d<0b111, "nbsl">;
|
|
|
|
|
2019-07-31 16:58:16 +08:00
|
|
|
// SVE2 bitwise xor and rotate right by immediate
|
2019-05-29 17:03:27 +08:00
|
|
|
defm XAR_ZZZI : sve2_int_rotate_right_imm<"xar">;
|
|
|
|
|
2019-05-30 16:25:17 +08:00
|
|
|
// SVE2 extract vector (immediate offset, constructive)
|
|
|
|
def EXT_ZZI_B : sve2_int_perm_extract_i_cons<"ext">;
|
|
|
|
|
2019-07-31 17:10:36 +08:00
|
|
|
// SVE2 non-temporal gather loads
|
|
|
|
defm LDNT1SB_ZZR_S : sve2_mem_gldnt_vs<0b00000, "ldnt1sb", Z_s, ZPR32>;
|
|
|
|
defm LDNT1B_ZZR_S : sve2_mem_gldnt_vs<0b00001, "ldnt1b", Z_s, ZPR32>;
|
|
|
|
defm LDNT1SH_ZZR_S : sve2_mem_gldnt_vs<0b00100, "ldnt1sh", Z_s, ZPR32>;
|
|
|
|
defm LDNT1H_ZZR_S : sve2_mem_gldnt_vs<0b00101, "ldnt1h", Z_s, ZPR32>;
|
|
|
|
defm LDNT1W_ZZR_S : sve2_mem_gldnt_vs<0b01001, "ldnt1w", Z_s, ZPR32>;
|
|
|
|
|
|
|
|
defm LDNT1SB_ZZR_D : sve2_mem_gldnt_vs<0b10000, "ldnt1sb", Z_d, ZPR64>;
|
|
|
|
defm LDNT1B_ZZR_D : sve2_mem_gldnt_vs<0b10010, "ldnt1b", Z_d, ZPR64>;
|
|
|
|
defm LDNT1SH_ZZR_D : sve2_mem_gldnt_vs<0b10100, "ldnt1sh", Z_d, ZPR64>;
|
|
|
|
defm LDNT1H_ZZR_D : sve2_mem_gldnt_vs<0b10110, "ldnt1h", Z_d, ZPR64>;
|
|
|
|
defm LDNT1SW_ZZR_D : sve2_mem_gldnt_vs<0b11000, "ldnt1sw", Z_d, ZPR64>;
|
|
|
|
defm LDNT1W_ZZR_D : sve2_mem_gldnt_vs<0b11010, "ldnt1w", Z_d, ZPR64>;
|
|
|
|
defm LDNT1D_ZZR_D : sve2_mem_gldnt_vs<0b11110, "ldnt1d", Z_d, ZPR64>;
|
2019-05-30 16:44:27 +08:00
|
|
|
|
2019-05-30 16:51:39 +08:00
|
|
|
// SVE2 vector splice (constructive)
|
|
|
|
defm SPLICE_ZPZZ : sve2_int_perm_splice_cons<"splice">;
|
|
|
|
|
2019-07-31 17:10:36 +08:00
|
|
|
// SVE2 non-temporal scatter stores
|
|
|
|
defm STNT1B_ZZR_S : sve2_mem_sstnt_vs<0b001, "stnt1b", Z_s, ZPR32>;
|
|
|
|
defm STNT1H_ZZR_S : sve2_mem_sstnt_vs<0b011, "stnt1h", Z_s, ZPR32>;
|
|
|
|
defm STNT1W_ZZR_S : sve2_mem_sstnt_vs<0b101, "stnt1w", Z_s, ZPR32>;
|
2019-05-31 16:59:40 +08:00
|
|
|
|
2019-07-31 17:10:36 +08:00
|
|
|
defm STNT1B_ZZR_D : sve2_mem_sstnt_vs<0b000, "stnt1b", Z_d, ZPR64>;
|
|
|
|
defm STNT1H_ZZR_D : sve2_mem_sstnt_vs<0b010, "stnt1h", Z_d, ZPR64>;
|
|
|
|
defm STNT1W_ZZR_D : sve2_mem_sstnt_vs<0b100, "stnt1w", Z_d, ZPR64>;
|
|
|
|
defm STNT1D_ZZR_D : sve2_mem_sstnt_vs<0b110, "stnt1d", Z_d, ZPR64>;
|
2019-05-31 17:06:53 +08:00
|
|
|
|
2019-07-31 16:58:16 +08:00
|
|
|
// SVE2 table lookup (three sources)
|
2019-05-31 17:06:53 +08:00
|
|
|
defm TBL_ZZZZ : sve2_int_perm_tbl<"tbl">;
|
|
|
|
defm TBX_ZZZ : sve2_int_perm_tbx<"tbx">;
|
2019-05-31 17:13:55 +08:00
|
|
|
|
2019-07-31 16:58:16 +08:00
|
|
|
// SVE2 integer compare scalar count and limit
|
2019-05-31 17:13:55 +08:00
|
|
|
defm WHILEGE_PWW : sve_int_while4_rr<0b000, "whilege">;
|
|
|
|
defm WHILEGT_PWW : sve_int_while4_rr<0b001, "whilegt">;
|
|
|
|
defm WHILEHS_PWW : sve_int_while4_rr<0b100, "whilehs">;
|
|
|
|
defm WHILEHI_PWW : sve_int_while4_rr<0b101, "whilehi">;
|
|
|
|
|
|
|
|
defm WHILEGE_PXX : sve_int_while8_rr<0b000, "whilege">;
|
|
|
|
defm WHILEGT_PXX : sve_int_while8_rr<0b001, "whilegt">;
|
|
|
|
defm WHILEHS_PXX : sve_int_while8_rr<0b100, "whilehs">;
|
|
|
|
defm WHILEHI_PXX : sve_int_while8_rr<0b101, "whilehi">;
|
|
|
|
|
2019-07-31 16:58:16 +08:00
|
|
|
// SVE2 pointer conflict compare
|
2019-05-31 17:13:55 +08:00
|
|
|
defm WHILEWR_PXX : sve2_int_while_rr<0b0, "whilewr">;
|
|
|
|
defm WHILERW_PXX : sve2_int_while_rr<0b1, "whilerw">;
|
2019-05-14 23:01:00 +08:00
|
|
|
}
|
2019-05-24 17:56:23 +08:00
|
|
|
|
|
|
|
let Predicates = [HasSVE2AES] in {
|
2019-05-28 17:13:17 +08:00
|
|
|
// SVE2 crypto destructive binary operations
|
|
|
|
def AESE_ZZZ_B : sve2_crypto_des_bin_op<0b00, "aese", ZPR8>;
|
|
|
|
def AESD_ZZZ_B : sve2_crypto_des_bin_op<0b01, "aesd", ZPR8>;
|
|
|
|
|
|
|
|
// SVE2 crypto unary operations
|
|
|
|
def AESMC_ZZ_B : sve2_crypto_unary_op<0b0, "aesmc">;
|
|
|
|
def AESIMC_ZZ_B : sve2_crypto_unary_op<0b1, "aesimc">;
|
|
|
|
|
2019-05-24 17:56:23 +08:00
|
|
|
// PMULLB and PMULLT instructions which operate with 64-bit source and
|
|
|
|
// 128-bit destination elements are enabled with crypto extensions, similar
|
|
|
|
// to NEON PMULL2 instruction.
|
|
|
|
def PMULLB_ZZZ_Q : sve2_wide_int_arith<0b00, 0b11010, "pmullb",
|
|
|
|
ZPR128, ZPR64, ZPR64>;
|
|
|
|
def PMULLT_ZZZ_Q : sve2_wide_int_arith<0b00, 0b11011, "pmullt",
|
|
|
|
ZPR128, ZPR64, ZPR64>;
|
|
|
|
}
|
[AArch64][SVE2] Asm: support SVE2 Misc Group
Summary:
Patch adds support for the following instructions:
SVE2 bitwise exclusive-or interleaved:
* EORBT, EORTB
SVE2 bitwise permute:
* BEXT, BDEP, BGRP
SVE2 bitwise shift left long:
* SSHLLB, SSHLLT, USHLLB, USHLLT
SVE2 integer add/subtract interleaved long:
* SADDLBT, SSUBLBT, SSUBLTB
BDEP, BEXT and BGRP are enabled with SVE2 feature +bitperm, all other
instructions in this group are enabled with +sve2.
Reviewed By: chill
Differential Revision: https://reviews.llvm.org/D62304
llvm-svn: 361795
2019-05-28 16:42:22 +08:00
|
|
|
|
2019-05-28 17:13:17 +08:00
|
|
|
let Predicates = [HasSVE2SM4] in {
|
|
|
|
// SVE2 crypto constructive binary operations
|
|
|
|
def SM4EKEY_ZZZ_S : sve2_crypto_cons_bin_op<0b0, "sm4ekey", ZPR32>;
|
|
|
|
// SVE2 crypto destructive binary operations
|
|
|
|
def SM4E_ZZZ_S : sve2_crypto_des_bin_op<0b10, "sm4e", ZPR32>;
|
|
|
|
}
|
|
|
|
|
|
|
|
let Predicates = [HasSVE2SHA3] in {
|
|
|
|
// SVE2 crypto constructive binary operations
|
|
|
|
def RAX1_ZZZ_D : sve2_crypto_cons_bin_op<0b1, "rax1", ZPR64>;
|
|
|
|
}
|
|
|
|
|
[AArch64][SVE2] Asm: support SVE2 Misc Group
Summary:
Patch adds support for the following instructions:
SVE2 bitwise exclusive-or interleaved:
* EORBT, EORTB
SVE2 bitwise permute:
* BEXT, BDEP, BGRP
SVE2 bitwise shift left long:
* SSHLLB, SSHLLT, USHLLB, USHLLT
SVE2 integer add/subtract interleaved long:
* SADDLBT, SSUBLBT, SSUBLTB
BDEP, BEXT and BGRP are enabled with SVE2 feature +bitperm, all other
instructions in this group are enabled with +sve2.
Reviewed By: chill
Differential Revision: https://reviews.llvm.org/D62304
llvm-svn: 361795
2019-05-28 16:42:22 +08:00
|
|
|
let Predicates = [HasSVE2BitPerm] in {
|
|
|
|
// SVE2 bitwise permute
|
|
|
|
defm BEXT_ZZZ : sve2_misc_bitwise<0b1100, "bext">;
|
|
|
|
defm BDEP_ZZZ : sve2_misc_bitwise<0b1101, "bdep">;
|
|
|
|
defm BGRP_ZZZ : sve2_misc_bitwise<0b1110, "bgrp">;
|
|
|
|
}
|