forked from OSchip/llvm-project
[AArch64][SVE] Add SVE2 intrinsics for pairwise arithmetic
Summary: Implements the following intrinsics: - addp - smaxp, sminp, umaxp & uminp - sadalp & uadalp Reviewers: dancgr, efriedma, sdesmalen, c-rhodes Reviewed By: c-rhodes Subscribers: tschuett, kristof.beyls, hiraditya, rkruppe, psnobl, cameron.mcinally, cfe-commits, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D73347
This commit is contained in:
parent
7116e431c0
commit
bd33a46213
|
@ -1032,6 +1032,13 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.".
|
||||||
LLVMVectorOfBitcastsToInt<0>],
|
LLVMVectorOfBitcastsToInt<0>],
|
||||||
[IntrNoMem]>;
|
[IntrNoMem]>;
|
||||||
|
|
||||||
|
class SVE2_2VectorArg_Pred_Long_Intrinsic
|
||||||
|
: Intrinsic<[llvm_anyvector_ty],
|
||||||
|
[LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
|
||||||
|
LLVMMatchType<0>,
|
||||||
|
LLVMSubdivide2VectorType<0>],
|
||||||
|
[IntrNoMem]>;
|
||||||
|
|
||||||
class SVE2_3VectorArg_Long_Intrinsic
|
class SVE2_3VectorArg_Long_Intrinsic
|
||||||
: Intrinsic<[llvm_anyvector_ty],
|
: Intrinsic<[llvm_anyvector_ty],
|
||||||
[LLVMMatchType<0>,
|
[LLVMMatchType<0>,
|
||||||
|
@ -1662,11 +1669,23 @@ def int_aarch64_sve_st1_scatter_scalar_offset : AdvSIMD_ScatterStore_VectorBase_
|
||||||
// SVE2 - Non-widening pairwise arithmetic
|
// SVE2 - Non-widening pairwise arithmetic
|
||||||
//
|
//
|
||||||
|
|
||||||
|
def int_aarch64_sve_addp : AdvSIMD_Pred2VectorArg_Intrinsic;
|
||||||
def int_aarch64_sve_faddp : AdvSIMD_Pred2VectorArg_Intrinsic;
|
def int_aarch64_sve_faddp : AdvSIMD_Pred2VectorArg_Intrinsic;
|
||||||
def int_aarch64_sve_fmaxp : AdvSIMD_Pred2VectorArg_Intrinsic;
|
def int_aarch64_sve_fmaxp : AdvSIMD_Pred2VectorArg_Intrinsic;
|
||||||
def int_aarch64_sve_fmaxnmp : AdvSIMD_Pred2VectorArg_Intrinsic;
|
def int_aarch64_sve_fmaxnmp : AdvSIMD_Pred2VectorArg_Intrinsic;
|
||||||
def int_aarch64_sve_fminp : AdvSIMD_Pred2VectorArg_Intrinsic;
|
def int_aarch64_sve_fminp : AdvSIMD_Pred2VectorArg_Intrinsic;
|
||||||
def int_aarch64_sve_fminnmp : AdvSIMD_Pred2VectorArg_Intrinsic;
|
def int_aarch64_sve_fminnmp : AdvSIMD_Pred2VectorArg_Intrinsic;
|
||||||
|
def int_aarch64_sve_smaxp : AdvSIMD_Pred2VectorArg_Intrinsic;
|
||||||
|
def int_aarch64_sve_sminp : AdvSIMD_Pred2VectorArg_Intrinsic;
|
||||||
|
def int_aarch64_sve_umaxp : AdvSIMD_Pred2VectorArg_Intrinsic;
|
||||||
|
def int_aarch64_sve_uminp : AdvSIMD_Pred2VectorArg_Intrinsic;
|
||||||
|
|
||||||
|
//
|
||||||
|
// SVE2 - Widening pairwise arithmetic
|
||||||
|
//
|
||||||
|
|
||||||
|
def int_aarch64_sve_sadalp : SVE2_2VectorArg_Pred_Long_Intrinsic;
|
||||||
|
def int_aarch64_sve_uadalp : SVE2_2VectorArg_Pred_Long_Intrinsic;
|
||||||
|
|
||||||
//
|
//
|
||||||
// SVE2 - Floating-point widening multiply-accumulate
|
// SVE2 - Floating-point widening multiply-accumulate
|
||||||
|
|
|
@ -1503,25 +1503,25 @@ let Predicates = [HasSVE2] in {
|
||||||
defm SQDMLSLBT_ZZZ : sve2_int_mla_long<0b00011, "sqdmlslbt">;
|
defm SQDMLSLBT_ZZZ : sve2_int_mla_long<0b00011, "sqdmlslbt">;
|
||||||
|
|
||||||
// SVE2 integer halving add/subtract (predicated)
|
// SVE2 integer halving add/subtract (predicated)
|
||||||
defm SHADD_ZPmZ : sve2_int_arith_pred<0b100000, "shadd">;
|
defm SHADD_ZPmZ : sve2_int_arith_pred<0b100000, "shadd", null_frag>;
|
||||||
defm UHADD_ZPmZ : sve2_int_arith_pred<0b100010, "uhadd">;
|
defm UHADD_ZPmZ : sve2_int_arith_pred<0b100010, "uhadd", null_frag>;
|
||||||
defm SHSUB_ZPmZ : sve2_int_arith_pred<0b100100, "shsub">;
|
defm SHSUB_ZPmZ : sve2_int_arith_pred<0b100100, "shsub", null_frag>;
|
||||||
defm UHSUB_ZPmZ : sve2_int_arith_pred<0b100110, "uhsub">;
|
defm UHSUB_ZPmZ : sve2_int_arith_pred<0b100110, "uhsub", null_frag>;
|
||||||
defm SRHADD_ZPmZ : sve2_int_arith_pred<0b101000, "srhadd">;
|
defm SRHADD_ZPmZ : sve2_int_arith_pred<0b101000, "srhadd", null_frag>;
|
||||||
defm URHADD_ZPmZ : sve2_int_arith_pred<0b101010, "urhadd">;
|
defm URHADD_ZPmZ : sve2_int_arith_pred<0b101010, "urhadd", null_frag>;
|
||||||
defm SHSUBR_ZPmZ : sve2_int_arith_pred<0b101100, "shsubr">;
|
defm SHSUBR_ZPmZ : sve2_int_arith_pred<0b101100, "shsubr", null_frag>;
|
||||||
defm UHSUBR_ZPmZ : sve2_int_arith_pred<0b101110, "uhsubr">;
|
defm UHSUBR_ZPmZ : sve2_int_arith_pred<0b101110, "uhsubr", null_frag>;
|
||||||
|
|
||||||
// SVE2 integer pairwise add and accumulate long
|
// SVE2 integer pairwise add and accumulate long
|
||||||
defm SADALP_ZPmZ : sve2_int_sadd_long_accum_pairwise<0, "sadalp">;
|
defm SADALP_ZPmZ : sve2_int_sadd_long_accum_pairwise<0, "sadalp", int_aarch64_sve_sadalp>;
|
||||||
defm UADALP_ZPmZ : sve2_int_sadd_long_accum_pairwise<1, "uadalp">;
|
defm UADALP_ZPmZ : sve2_int_sadd_long_accum_pairwise<1, "uadalp", int_aarch64_sve_uadalp>;
|
||||||
|
|
||||||
// SVE2 integer pairwise arithmetic
|
// SVE2 integer pairwise arithmetic
|
||||||
defm ADDP_ZPmZ : sve2_int_arith_pred<0b100011, "addp">;
|
defm ADDP_ZPmZ : sve2_int_arith_pred<0b100011, "addp", int_aarch64_sve_addp>;
|
||||||
defm SMAXP_ZPmZ : sve2_int_arith_pred<0b101001, "smaxp">;
|
defm SMAXP_ZPmZ : sve2_int_arith_pred<0b101001, "smaxp", int_aarch64_sve_smaxp>;
|
||||||
defm UMAXP_ZPmZ : sve2_int_arith_pred<0b101011, "umaxp">;
|
defm UMAXP_ZPmZ : sve2_int_arith_pred<0b101011, "umaxp", int_aarch64_sve_umaxp>;
|
||||||
defm SMINP_ZPmZ : sve2_int_arith_pred<0b101101, "sminp">;
|
defm SMINP_ZPmZ : sve2_int_arith_pred<0b101101, "sminp", int_aarch64_sve_sminp>;
|
||||||
defm UMINP_ZPmZ : sve2_int_arith_pred<0b101111, "uminp">;
|
defm UMINP_ZPmZ : sve2_int_arith_pred<0b101111, "uminp", int_aarch64_sve_uminp>;
|
||||||
|
|
||||||
// SVE2 integer unary operations (predicated)
|
// SVE2 integer unary operations (predicated)
|
||||||
defm URECPE_ZPmZ : sve2_int_un_pred_arit_s<0b000, "urecpe">;
|
defm URECPE_ZPmZ : sve2_int_un_pred_arit_s<0b000, "urecpe">;
|
||||||
|
@ -1530,28 +1530,28 @@ let Predicates = [HasSVE2] in {
|
||||||
defm SQNEG_ZPmZ : sve2_int_un_pred_arit<0b101, "sqneg">;
|
defm SQNEG_ZPmZ : sve2_int_un_pred_arit<0b101, "sqneg">;
|
||||||
|
|
||||||
// SVE2 saturating add/subtract
|
// SVE2 saturating add/subtract
|
||||||
defm SQADD_ZPmZ : sve2_int_arith_pred<0b110000, "sqadd">;
|
defm SQADD_ZPmZ : sve2_int_arith_pred<0b110000, "sqadd", null_frag>;
|
||||||
defm UQADD_ZPmZ : sve2_int_arith_pred<0b110010, "uqadd">;
|
defm UQADD_ZPmZ : sve2_int_arith_pred<0b110010, "uqadd", null_frag>;
|
||||||
defm SQSUB_ZPmZ : sve2_int_arith_pred<0b110100, "sqsub">;
|
defm SQSUB_ZPmZ : sve2_int_arith_pred<0b110100, "sqsub", null_frag>;
|
||||||
defm UQSUB_ZPmZ : sve2_int_arith_pred<0b110110, "uqsub">;
|
defm UQSUB_ZPmZ : sve2_int_arith_pred<0b110110, "uqsub", null_frag>;
|
||||||
defm SUQADD_ZPmZ : sve2_int_arith_pred<0b111000, "suqadd">;
|
defm SUQADD_ZPmZ : sve2_int_arith_pred<0b111000, "suqadd", null_frag>;
|
||||||
defm USQADD_ZPmZ : sve2_int_arith_pred<0b111010, "usqadd">;
|
defm USQADD_ZPmZ : sve2_int_arith_pred<0b111010, "usqadd", null_frag>;
|
||||||
defm SQSUBR_ZPmZ : sve2_int_arith_pred<0b111100, "sqsubr">;
|
defm SQSUBR_ZPmZ : sve2_int_arith_pred<0b111100, "sqsubr", null_frag>;
|
||||||
defm UQSUBR_ZPmZ : sve2_int_arith_pred<0b111110, "uqsubr">;
|
defm UQSUBR_ZPmZ : sve2_int_arith_pred<0b111110, "uqsubr", null_frag>;
|
||||||
|
|
||||||
// SVE2 saturating/rounding bitwise shift left (predicated)
|
// SVE2 saturating/rounding bitwise shift left (predicated)
|
||||||
defm SRSHL_ZPmZ : sve2_int_arith_pred<0b000100, "srshl">;
|
defm SRSHL_ZPmZ : sve2_int_arith_pred<0b000100, "srshl", null_frag>;
|
||||||
defm URSHL_ZPmZ : sve2_int_arith_pred<0b000110, "urshl">;
|
defm URSHL_ZPmZ : sve2_int_arith_pred<0b000110, "urshl", null_frag>;
|
||||||
defm SRSHLR_ZPmZ : sve2_int_arith_pred<0b001100, "srshlr">;
|
defm SRSHLR_ZPmZ : sve2_int_arith_pred<0b001100, "srshlr", null_frag>;
|
||||||
defm URSHLR_ZPmZ : sve2_int_arith_pred<0b001110, "urshlr">;
|
defm URSHLR_ZPmZ : sve2_int_arith_pred<0b001110, "urshlr", null_frag>;
|
||||||
defm SQSHL_ZPmZ : sve2_int_arith_pred<0b010000, "sqshl">;
|
defm SQSHL_ZPmZ : sve2_int_arith_pred<0b010000, "sqshl", null_frag>;
|
||||||
defm UQSHL_ZPmZ : sve2_int_arith_pred<0b010010, "uqshl">;
|
defm UQSHL_ZPmZ : sve2_int_arith_pred<0b010010, "uqshl", null_frag>;
|
||||||
defm SQRSHL_ZPmZ : sve2_int_arith_pred<0b010100, "sqrshl">;
|
defm SQRSHL_ZPmZ : sve2_int_arith_pred<0b010100, "sqrshl", null_frag>;
|
||||||
defm UQRSHL_ZPmZ : sve2_int_arith_pred<0b010110, "uqrshl">;
|
defm UQRSHL_ZPmZ : sve2_int_arith_pred<0b010110, "uqrshl", null_frag>;
|
||||||
defm SQSHLR_ZPmZ : sve2_int_arith_pred<0b011000, "sqshlr">;
|
defm SQSHLR_ZPmZ : sve2_int_arith_pred<0b011000, "sqshlr", null_frag>;
|
||||||
defm UQSHLR_ZPmZ : sve2_int_arith_pred<0b011010, "uqshlr">;
|
defm UQSHLR_ZPmZ : sve2_int_arith_pred<0b011010, "uqshlr", null_frag>;
|
||||||
defm SQRSHLR_ZPmZ : sve2_int_arith_pred<0b011100, "sqrshlr">;
|
defm SQRSHLR_ZPmZ : sve2_int_arith_pred<0b011100, "sqrshlr", null_frag>;
|
||||||
defm UQRSHLR_ZPmZ : sve2_int_arith_pred<0b011110, "uqrshlr">;
|
defm UQRSHLR_ZPmZ : sve2_int_arith_pred<0b011110, "uqrshlr", null_frag>;
|
||||||
|
|
||||||
// SVE2 predicated shifts
|
// SVE2 predicated shifts
|
||||||
defm SQSHL_ZPmI : sve_int_bin_pred_shift_imm_left< 0b0110, "sqshl">;
|
defm SQSHL_ZPmI : sve_int_bin_pred_shift_imm_left< 0b0110, "sqshl">;
|
||||||
|
|
|
@ -2716,7 +2716,7 @@ class sve2_int_arith_pred<bits<2> sz, bits<6> opc, string asm,
|
||||||
bits<5> Zdn;
|
bits<5> Zdn;
|
||||||
let Inst{31-24} = 0b01000100;
|
let Inst{31-24} = 0b01000100;
|
||||||
let Inst{23-22} = sz;
|
let Inst{23-22} = sz;
|
||||||
let Inst{21} = 0b0;
|
let Inst{21-20} = 0b01;
|
||||||
let Inst{20-16} = opc{5-1};
|
let Inst{20-16} = opc{5-1};
|
||||||
let Inst{15-14} = 0b10;
|
let Inst{15-14} = 0b10;
|
||||||
let Inst{13} = opc{0};
|
let Inst{13} = opc{0};
|
||||||
|
@ -2729,11 +2729,16 @@ class sve2_int_arith_pred<bits<2> sz, bits<6> opc, string asm,
|
||||||
let ElementSize = zprty.ElementSize;
|
let ElementSize = zprty.ElementSize;
|
||||||
}
|
}
|
||||||
|
|
||||||
multiclass sve2_int_arith_pred<bits<6> opc, string asm> {
|
multiclass sve2_int_arith_pred<bits<6> opc, string asm, SDPatternOperator op> {
|
||||||
def _B : sve2_int_arith_pred<0b00, opc, asm, ZPR8>;
|
def _B : sve2_int_arith_pred<0b00, opc, asm, ZPR8>;
|
||||||
def _H : sve2_int_arith_pred<0b01, opc, asm, ZPR16>;
|
def _H : sve2_int_arith_pred<0b01, opc, asm, ZPR16>;
|
||||||
def _S : sve2_int_arith_pred<0b10, opc, asm, ZPR32>;
|
def _S : sve2_int_arith_pred<0b10, opc, asm, ZPR32>;
|
||||||
def _D : sve2_int_arith_pred<0b11, opc, asm, ZPR64>;
|
def _D : sve2_int_arith_pred<0b11, opc, asm, ZPR64>;
|
||||||
|
|
||||||
|
def : SVE_3_Op_Pat<nxv16i8, op, nxv16i1, nxv16i8, nxv16i8, !cast<Instruction>(NAME # _B)>;
|
||||||
|
def : SVE_3_Op_Pat<nxv8i16, op, nxv8i1, nxv8i16, nxv8i16, !cast<Instruction>(NAME # _H)>;
|
||||||
|
def : SVE_3_Op_Pat<nxv4i32, op, nxv4i1, nxv4i32, nxv4i32, !cast<Instruction>(NAME # _S)>;
|
||||||
|
def : SVE_3_Op_Pat<nxv2i64, op, nxv2i1, nxv2i64, nxv2i64, !cast<Instruction>(NAME # _D)>;
|
||||||
}
|
}
|
||||||
|
|
||||||
class sve2_int_sadd_long_accum_pairwise<bits<2> sz, bit U, string asm,
|
class sve2_int_sadd_long_accum_pairwise<bits<2> sz, bit U, string asm,
|
||||||
|
@ -2757,10 +2762,14 @@ class sve2_int_sadd_long_accum_pairwise<bits<2> sz, bit U, string asm,
|
||||||
let ElementSize = zprty1.ElementSize;
|
let ElementSize = zprty1.ElementSize;
|
||||||
}
|
}
|
||||||
|
|
||||||
multiclass sve2_int_sadd_long_accum_pairwise<bit U, string asm> {
|
multiclass sve2_int_sadd_long_accum_pairwise<bit U, string asm, SDPatternOperator op> {
|
||||||
def _H : sve2_int_sadd_long_accum_pairwise<0b01, U, asm, ZPR16, ZPR8>;
|
def _H : sve2_int_sadd_long_accum_pairwise<0b01, U, asm, ZPR16, ZPR8>;
|
||||||
def _S : sve2_int_sadd_long_accum_pairwise<0b10, U, asm, ZPR32, ZPR16>;
|
def _S : sve2_int_sadd_long_accum_pairwise<0b10, U, asm, ZPR32, ZPR16>;
|
||||||
def _D : sve2_int_sadd_long_accum_pairwise<0b11, U, asm, ZPR64, ZPR32>;
|
def _D : sve2_int_sadd_long_accum_pairwise<0b11, U, asm, ZPR64, ZPR32>;
|
||||||
|
|
||||||
|
def : SVE_3_Op_Pat<nxv8i16, op, nxv8i1, nxv8i16, nxv16i8, !cast<Instruction>(NAME # _H)>;
|
||||||
|
def : SVE_3_Op_Pat<nxv4i32, op, nxv4i1, nxv4i32, nxv8i16, !cast<Instruction>(NAME # _S)>;
|
||||||
|
def : SVE_3_Op_Pat<nxv2i64, op, nxv2i1, nxv2i64, nxv4i32, !cast<Instruction>(NAME # _D)>;
|
||||||
}
|
}
|
||||||
|
|
||||||
class sve2_int_un_pred_arit<bits<2> sz, bit Q, bits<2> opc,
|
class sve2_int_un_pred_arit<bits<2> sz, bit Q, bits<2> opc,
|
||||||
|
|
|
@ -1,5 +1,49 @@
|
||||||
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s | FileCheck %s
|
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s | FileCheck %s
|
||||||
|
|
||||||
|
;
|
||||||
|
; ADDP
|
||||||
|
;
|
||||||
|
|
||||||
|
define <vscale x 16 x i8> @addp_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
|
||||||
|
; CHECK-LABEL: addp_i8:
|
||||||
|
; CHECK: addp z0.b, p0/m, z0.b, z1.b
|
||||||
|
; CHECK-NEXT: ret
|
||||||
|
%out = call <vscale x 16 x i8> @llvm.aarch64.sve.addp.nxv16i8(<vscale x 16 x i1> %pg,
|
||||||
|
<vscale x 16 x i8> %a,
|
||||||
|
<vscale x 16 x i8> %b)
|
||||||
|
ret <vscale x 16 x i8> %out
|
||||||
|
}
|
||||||
|
|
||||||
|
define <vscale x 8 x i16> @addp_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
|
||||||
|
; CHECK-LABEL: addp_i16:
|
||||||
|
; CHECK: addp z0.h, p0/m, z0.h, z1.h
|
||||||
|
; CHECK-NEXT: ret
|
||||||
|
%out = call <vscale x 8 x i16> @llvm.aarch64.sve.addp.nxv8i16(<vscale x 8 x i1> %pg,
|
||||||
|
<vscale x 8 x i16> %a,
|
||||||
|
<vscale x 8 x i16> %b)
|
||||||
|
ret <vscale x 8 x i16> %out
|
||||||
|
}
|
||||||
|
|
||||||
|
define <vscale x 4 x i32> @addp_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
|
||||||
|
; CHECK-LABEL: addp_i32:
|
||||||
|
; CHECK: addp z0.s, p0/m, z0.s, z1.s
|
||||||
|
; CHECK-NEXT: ret
|
||||||
|
%out = call <vscale x 4 x i32> @llvm.aarch64.sve.addp.nxv4i32(<vscale x 4 x i1> %pg,
|
||||||
|
<vscale x 4 x i32> %a,
|
||||||
|
<vscale x 4 x i32> %b)
|
||||||
|
ret <vscale x 4 x i32> %out
|
||||||
|
}
|
||||||
|
|
||||||
|
define <vscale x 2 x i64> @addp_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
|
||||||
|
; CHECK-LABEL: addp_i64:
|
||||||
|
; CHECK: addp z0.d, p0/m, z0.d, z1.d
|
||||||
|
; CHECK-NEXT: ret
|
||||||
|
%out = call <vscale x 2 x i64> @llvm.aarch64.sve.addp.nxv2i64(<vscale x 2 x i1> %pg,
|
||||||
|
<vscale x 2 x i64> %a,
|
||||||
|
<vscale x 2 x i64> %b)
|
||||||
|
ret <vscale x 2 x i64> %out
|
||||||
|
}
|
||||||
|
|
||||||
;
|
;
|
||||||
; FADDP
|
; FADDP
|
||||||
;
|
;
|
||||||
|
@ -170,6 +214,187 @@ define <vscale x 2 x double> @fminnmp_f64(<vscale x 2 x i1> %pg, <vscale x 2 x d
|
||||||
ret <vscale x 2 x double> %out
|
ret <vscale x 2 x double> %out
|
||||||
}
|
}
|
||||||
|
|
||||||
|
;
|
||||||
|
; SMAXP
|
||||||
|
;
|
||||||
|
|
||||||
|
define <vscale x 16 x i8> @smaxp_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
|
||||||
|
; CHECK-LABEL: smaxp_i8:
|
||||||
|
; CHECK: smaxp z0.b, p0/m, z0.b, z1.b
|
||||||
|
; CHECK-NEXT: ret
|
||||||
|
%out = call <vscale x 16 x i8> @llvm.aarch64.sve.smaxp.nxv16i8(<vscale x 16 x i1> %pg,
|
||||||
|
<vscale x 16 x i8> %a,
|
||||||
|
<vscale x 16 x i8> %b)
|
||||||
|
ret <vscale x 16 x i8> %out
|
||||||
|
}
|
||||||
|
|
||||||
|
define <vscale x 8 x i16> @smaxp_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
|
||||||
|
; CHECK-LABEL: smaxp_i16:
|
||||||
|
; CHECK: smaxp z0.h, p0/m, z0.h, z1.h
|
||||||
|
; CHECK-NEXT: ret
|
||||||
|
%out = call <vscale x 8 x i16> @llvm.aarch64.sve.smaxp.nxv8i16(<vscale x 8 x i1> %pg,
|
||||||
|
<vscale x 8 x i16> %a,
|
||||||
|
<vscale x 8 x i16> %b)
|
||||||
|
ret <vscale x 8 x i16> %out
|
||||||
|
}
|
||||||
|
|
||||||
|
define <vscale x 4 x i32> @smaxp_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
|
||||||
|
; CHECK-LABEL: smaxp_i32:
|
||||||
|
; CHECK: smaxp z0.s, p0/m, z0.s, z1.s
|
||||||
|
; CHECK-NEXT: ret
|
||||||
|
%out = call <vscale x 4 x i32> @llvm.aarch64.sve.smaxp.nxv4i32(<vscale x 4 x i1> %pg,
|
||||||
|
<vscale x 4 x i32> %a,
|
||||||
|
<vscale x 4 x i32> %b)
|
||||||
|
ret <vscale x 4 x i32> %out
|
||||||
|
}
|
||||||
|
|
||||||
|
define <vscale x 2 x i64> @smaxp_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
|
||||||
|
; CHECK-LABEL: smaxp_i64:
|
||||||
|
; CHECK: smaxp z0.d, p0/m, z0.d, z1.d
|
||||||
|
; CHECK-NEXT: ret
|
||||||
|
%out = call <vscale x 2 x i64> @llvm.aarch64.sve.smaxp.nxv2i64(<vscale x 2 x i1> %pg,
|
||||||
|
<vscale x 2 x i64> %a,
|
||||||
|
<vscale x 2 x i64> %b)
|
||||||
|
ret <vscale x 2 x i64> %out
|
||||||
|
}
|
||||||
|
|
||||||
|
;
|
||||||
|
; SMINP
|
||||||
|
;
|
||||||
|
|
||||||
|
define <vscale x 16 x i8> @sminp_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
|
||||||
|
; CHECK-LABEL: sminp_i8:
|
||||||
|
; CHECK: sminp z0.b, p0/m, z0.b, z1.b
|
||||||
|
; CHECK-NEXT: ret
|
||||||
|
%out = call <vscale x 16 x i8> @llvm.aarch64.sve.sminp.nxv16i8(<vscale x 16 x i1> %pg,
|
||||||
|
<vscale x 16 x i8> %a,
|
||||||
|
<vscale x 16 x i8> %b)
|
||||||
|
ret <vscale x 16 x i8> %out
|
||||||
|
}
|
||||||
|
|
||||||
|
define <vscale x 8 x i16> @sminp_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
|
||||||
|
; CHECK-LABEL: sminp_i16:
|
||||||
|
; CHECK: sminp z0.h, p0/m, z0.h, z1.h
|
||||||
|
; CHECK-NEXT: ret
|
||||||
|
%out = call <vscale x 8 x i16> @llvm.aarch64.sve.sminp.nxv8i16(<vscale x 8 x i1> %pg,
|
||||||
|
<vscale x 8 x i16> %a,
|
||||||
|
<vscale x 8 x i16> %b)
|
||||||
|
ret <vscale x 8 x i16> %out
|
||||||
|
}
|
||||||
|
|
||||||
|
define <vscale x 4 x i32> @sminp_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
|
||||||
|
; CHECK-LABEL: sminp_i32:
|
||||||
|
; CHECK: sminp z0.s, p0/m, z0.s, z1.s
|
||||||
|
; CHECK-NEXT: ret
|
||||||
|
%out = call <vscale x 4 x i32> @llvm.aarch64.sve.sminp.nxv4i32(<vscale x 4 x i1> %pg,
|
||||||
|
<vscale x 4 x i32> %a,
|
||||||
|
<vscale x 4 x i32> %b)
|
||||||
|
ret <vscale x 4 x i32> %out
|
||||||
|
}
|
||||||
|
|
||||||
|
define <vscale x 2 x i64> @sminp_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
|
||||||
|
; CHECK-LABEL: sminp_i64:
|
||||||
|
; CHECK: sminp z0.d, p0/m, z0.d, z1.d
|
||||||
|
; CHECK-NEXT: ret
|
||||||
|
%out = call <vscale x 2 x i64> @llvm.aarch64.sve.sminp.nxv2i64(<vscale x 2 x i1> %pg,
|
||||||
|
<vscale x 2 x i64> %a,
|
||||||
|
<vscale x 2 x i64> %b)
|
||||||
|
ret <vscale x 2 x i64> %out
|
||||||
|
}
|
||||||
|
|
||||||
|
;
|
||||||
|
; UMINP
|
||||||
|
;
|
||||||
|
|
||||||
|
define <vscale x 16 x i8> @uminp_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
|
||||||
|
; CHECK-LABEL: uminp_i8:
|
||||||
|
; CHECK: uminp z0.b, p0/m, z0.b, z1.b
|
||||||
|
; CHECK-NEXT: ret
|
||||||
|
%out = call <vscale x 16 x i8> @llvm.aarch64.sve.uminp.nxv16i8(<vscale x 16 x i1> %pg,
|
||||||
|
<vscale x 16 x i8> %a,
|
||||||
|
<vscale x 16 x i8> %b)
|
||||||
|
ret <vscale x 16 x i8> %out
|
||||||
|
}
|
||||||
|
|
||||||
|
define <vscale x 8 x i16> @uminp_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
|
||||||
|
; CHECK-LABEL: uminp_i16:
|
||||||
|
; CHECK: uminp z0.h, p0/m, z0.h, z1.h
|
||||||
|
; CHECK-NEXT: ret
|
||||||
|
%out = call <vscale x 8 x i16> @llvm.aarch64.sve.uminp.nxv8i16(<vscale x 8 x i1> %pg,
|
||||||
|
<vscale x 8 x i16> %a,
|
||||||
|
<vscale x 8 x i16> %b)
|
||||||
|
ret <vscale x 8 x i16> %out
|
||||||
|
}
|
||||||
|
|
||||||
|
define <vscale x 4 x i32> @uminp_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
|
||||||
|
; CHECK-LABEL: uminp_i32:
|
||||||
|
; CHECK: uminp z0.s, p0/m, z0.s, z1.s
|
||||||
|
; CHECK-NEXT: ret
|
||||||
|
%out = call <vscale x 4 x i32> @llvm.aarch64.sve.uminp.nxv4i32(<vscale x 4 x i1> %pg,
|
||||||
|
<vscale x 4 x i32> %a,
|
||||||
|
<vscale x 4 x i32> %b)
|
||||||
|
ret <vscale x 4 x i32> %out
|
||||||
|
}
|
||||||
|
|
||||||
|
define <vscale x 2 x i64> @uminp_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
|
||||||
|
; CHECK-LABEL: uminp_i64:
|
||||||
|
; CHECK: uminp z0.d, p0/m, z0.d, z1.d
|
||||||
|
; CHECK-NEXT: ret
|
||||||
|
%out = call <vscale x 2 x i64> @llvm.aarch64.sve.uminp.nxv2i64(<vscale x 2 x i1> %pg,
|
||||||
|
<vscale x 2 x i64> %a,
|
||||||
|
<vscale x 2 x i64> %b)
|
||||||
|
ret <vscale x 2 x i64> %out
|
||||||
|
}
|
||||||
|
|
||||||
|
;
|
||||||
|
; UMAXP
|
||||||
|
;
|
||||||
|
|
||||||
|
define <vscale x 16 x i8> @umaxp_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
|
||||||
|
; CHECK-LABEL: umaxp_i8:
|
||||||
|
; CHECK: umaxp z0.b, p0/m, z0.b, z1.b
|
||||||
|
; CHECK-NEXT: ret
|
||||||
|
%out = call <vscale x 16 x i8> @llvm.aarch64.sve.umaxp.nxv16i8(<vscale x 16 x i1> %pg,
|
||||||
|
<vscale x 16 x i8> %a,
|
||||||
|
<vscale x 16 x i8> %b)
|
||||||
|
ret <vscale x 16 x i8> %out
|
||||||
|
}
|
||||||
|
|
||||||
|
define <vscale x 8 x i16> @umaxp_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
|
||||||
|
; CHECK-LABEL: umaxp_i16:
|
||||||
|
; CHECK: umaxp z0.h, p0/m, z0.h, z1.h
|
||||||
|
; CHECK-NEXT: ret
|
||||||
|
%out = call <vscale x 8 x i16> @llvm.aarch64.sve.umaxp.nxv8i16(<vscale x 8 x i1> %pg,
|
||||||
|
<vscale x 8 x i16> %a,
|
||||||
|
<vscale x 8 x i16> %b)
|
||||||
|
ret <vscale x 8 x i16> %out
|
||||||
|
}
|
||||||
|
|
||||||
|
define <vscale x 4 x i32> @umaxp_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
|
||||||
|
; CHECK-LABEL: umaxp_i32:
|
||||||
|
; CHECK: umaxp z0.s, p0/m, z0.s, z1.s
|
||||||
|
; CHECK-NEXT: ret
|
||||||
|
%out = call <vscale x 4 x i32> @llvm.aarch64.sve.umaxp.nxv4i32(<vscale x 4 x i1> %pg,
|
||||||
|
<vscale x 4 x i32> %a,
|
||||||
|
<vscale x 4 x i32> %b)
|
||||||
|
ret <vscale x 4 x i32> %out
|
||||||
|
}
|
||||||
|
|
||||||
|
define <vscale x 2 x i64> @umaxp_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
|
||||||
|
; CHECK-LABEL: umaxp_i64:
|
||||||
|
; CHECK: umaxp z0.d, p0/m, z0.d, z1.d
|
||||||
|
; CHECK-NEXT: ret
|
||||||
|
%out = call <vscale x 2 x i64> @llvm.aarch64.sve.umaxp.nxv2i64(<vscale x 2 x i1> %pg,
|
||||||
|
<vscale x 2 x i64> %a,
|
||||||
|
<vscale x 2 x i64> %b)
|
||||||
|
ret <vscale x 2 x i64> %out
|
||||||
|
}
|
||||||
|
|
||||||
|
declare <vscale x 16 x i8> @llvm.aarch64.sve.addp.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
|
||||||
|
declare <vscale x 8 x i16> @llvm.aarch64.sve.addp.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
|
||||||
|
declare <vscale x 4 x i32> @llvm.aarch64.sve.addp.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
|
||||||
|
declare <vscale x 2 x i64> @llvm.aarch64.sve.addp.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
|
||||||
|
|
||||||
declare <vscale x 8 x half> @llvm.aarch64.sve.faddp.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
|
declare <vscale x 8 x half> @llvm.aarch64.sve.faddp.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
|
||||||
declare <vscale x 4 x float> @llvm.aarch64.sve.faddp.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
|
declare <vscale x 4 x float> @llvm.aarch64.sve.faddp.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
|
||||||
declare <vscale x 2 x double> @llvm.aarch64.sve.faddp.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
|
declare <vscale x 2 x double> @llvm.aarch64.sve.faddp.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
|
||||||
|
@ -189,3 +414,23 @@ declare <vscale x 2 x double> @llvm.aarch64.sve.fminp.nxv2f64(<vscale x 2 x i1>,
|
||||||
declare <vscale x 8 x half> @llvm.aarch64.sve.fminnmp.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
|
declare <vscale x 8 x half> @llvm.aarch64.sve.fminnmp.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
|
||||||
declare <vscale x 4 x float> @llvm.aarch64.sve.fminnmp.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
|
declare <vscale x 4 x float> @llvm.aarch64.sve.fminnmp.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
|
||||||
declare <vscale x 2 x double> @llvm.aarch64.sve.fminnmp.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
|
declare <vscale x 2 x double> @llvm.aarch64.sve.fminnmp.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
|
||||||
|
|
||||||
|
declare <vscale x 16 x i8> @llvm.aarch64.sve.smaxp.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
|
||||||
|
declare <vscale x 8 x i16> @llvm.aarch64.sve.smaxp.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
|
||||||
|
declare <vscale x 4 x i32> @llvm.aarch64.sve.smaxp.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
|
||||||
|
declare <vscale x 2 x i64> @llvm.aarch64.sve.smaxp.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
|
||||||
|
|
||||||
|
declare <vscale x 16 x i8> @llvm.aarch64.sve.sminp.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
|
||||||
|
declare <vscale x 8 x i16> @llvm.aarch64.sve.sminp.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
|
||||||
|
declare <vscale x 4 x i32> @llvm.aarch64.sve.sminp.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
|
||||||
|
declare <vscale x 2 x i64> @llvm.aarch64.sve.sminp.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
|
||||||
|
|
||||||
|
declare <vscale x 16 x i8> @llvm.aarch64.sve.umaxp.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
|
||||||
|
declare <vscale x 8 x i16> @llvm.aarch64.sve.umaxp.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
|
||||||
|
declare <vscale x 4 x i32> @llvm.aarch64.sve.umaxp.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
|
||||||
|
declare <vscale x 2 x i64> @llvm.aarch64.sve.umaxp.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
|
||||||
|
|
||||||
|
declare <vscale x 16 x i8> @llvm.aarch64.sve.uminp.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
|
||||||
|
declare <vscale x 8 x i16> @llvm.aarch64.sve.uminp.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
|
||||||
|
declare <vscale x 4 x i32> @llvm.aarch64.sve.uminp.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
|
||||||
|
declare <vscale x 2 x i64> @llvm.aarch64.sve.uminp.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
|
||||||
|
|
|
@ -0,0 +1,77 @@
|
||||||
|
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s | FileCheck %s
|
||||||
|
|
||||||
|
;
|
||||||
|
; SADALP
|
||||||
|
;
|
||||||
|
|
||||||
|
define <vscale x 8 x i16> @sadalp_i8(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 16 x i8> %b) {
|
||||||
|
; CHECK-LABEL: sadalp_i8:
|
||||||
|
; CHECK: sadalp z0.h, p0/m, z1.b
|
||||||
|
; CHECK-NEXT: ret
|
||||||
|
%out = call <vscale x 8 x i16> @llvm.aarch64.sve.sadalp.nxv8i16(<vscale x 8 x i1> %pg,
|
||||||
|
<vscale x 8 x i16> %a,
|
||||||
|
<vscale x 16 x i8> %b)
|
||||||
|
ret <vscale x 8 x i16> %out
|
||||||
|
}
|
||||||
|
|
||||||
|
define <vscale x 4 x i32> @sadalp_i16(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 8 x i16> %b) {
|
||||||
|
; CHECK-LABEL: sadalp_i16:
|
||||||
|
; CHECK: sadalp z0.s, p0/m, z1.h
|
||||||
|
; CHECK-NEXT: ret
|
||||||
|
%out = call <vscale x 4 x i32> @llvm.aarch64.sve.sadalp.nxv4i32(<vscale x 4 x i1> %pg,
|
||||||
|
<vscale x 4 x i32> %a,
|
||||||
|
<vscale x 8 x i16> %b)
|
||||||
|
ret <vscale x 4 x i32> %out
|
||||||
|
}
|
||||||
|
|
||||||
|
define <vscale x 2 x i64> @sadalp_i32(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 4 x i32> %b) {
|
||||||
|
; CHECK-LABEL: sadalp_i32:
|
||||||
|
; CHECK: sadalp z0.d, p0/m, z1.s
|
||||||
|
; CHECK-NEXT: ret
|
||||||
|
%out = call <vscale x 2 x i64> @llvm.aarch64.sve.sadalp.nxv2i64(<vscale x 2 x i1> %pg,
|
||||||
|
<vscale x 2 x i64> %a,
|
||||||
|
<vscale x 4 x i32> %b)
|
||||||
|
ret <vscale x 2 x i64> %out
|
||||||
|
}
|
||||||
|
|
||||||
|
;
|
||||||
|
; UADALP
|
||||||
|
;
|
||||||
|
|
||||||
|
define <vscale x 8 x i16> @uadalp_i8(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 16 x i8> %b) {
|
||||||
|
; CHECK-LABEL: uadalp_i8:
|
||||||
|
; CHECK: uadalp z0.h, p0/m, z1.b
|
||||||
|
; CHECK-NEXT: ret
|
||||||
|
%out = call <vscale x 8 x i16> @llvm.aarch64.sve.uadalp.nxv8i16(<vscale x 8 x i1> %pg,
|
||||||
|
<vscale x 8 x i16> %a,
|
||||||
|
<vscale x 16 x i8> %b)
|
||||||
|
ret <vscale x 8 x i16> %out
|
||||||
|
}
|
||||||
|
|
||||||
|
define <vscale x 4 x i32> @uadalp_i16(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 8 x i16> %b) {
|
||||||
|
; CHECK-LABEL: uadalp_i16:
|
||||||
|
; CHECK: uadalp z0.s, p0/m, z1.h
|
||||||
|
; CHECK-NEXT: ret
|
||||||
|
%out = call <vscale x 4 x i32> @llvm.aarch64.sve.uadalp.nxv4i32(<vscale x 4 x i1> %pg,
|
||||||
|
<vscale x 4 x i32> %a,
|
||||||
|
<vscale x 8 x i16> %b)
|
||||||
|
ret <vscale x 4 x i32> %out
|
||||||
|
}
|
||||||
|
|
||||||
|
define <vscale x 2 x i64> @uadalp_i32(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 4 x i32> %b) {
|
||||||
|
; CHECK-LABEL: uadalp_i32:
|
||||||
|
; CHECK: uadalp z0.d, p0/m, z1.s
|
||||||
|
; CHECK-NEXT: ret
|
||||||
|
%out = call <vscale x 2 x i64> @llvm.aarch64.sve.uadalp.nxv2i64(<vscale x 2 x i1> %pg,
|
||||||
|
<vscale x 2 x i64> %a,
|
||||||
|
<vscale x 4 x i32> %b)
|
||||||
|
ret <vscale x 2 x i64> %out
|
||||||
|
}
|
||||||
|
|
||||||
|
declare <vscale x 8 x i16> @llvm.aarch64.sve.sadalp.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 16 x i8>)
|
||||||
|
declare <vscale x 4 x i32> @llvm.aarch64.sve.sadalp.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 8 x i16>)
|
||||||
|
declare <vscale x 2 x i64> @llvm.aarch64.sve.sadalp.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 4 x i32>)
|
||||||
|
|
||||||
|
declare <vscale x 8 x i16> @llvm.aarch64.sve.uadalp.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 16 x i8>)
|
||||||
|
declare <vscale x 4 x i32> @llvm.aarch64.sve.uadalp.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 8 x i16>)
|
||||||
|
declare <vscale x 2 x i64> @llvm.aarch64.sve.uadalp.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 4 x i32>)
|
Loading…
Reference in New Issue