forked from OSchip/llvm-project
[AArch64][SVE] Generate ASRD instructions for power of 2 signed divides
Differential Revision: https://reviews.llvm.org/D113281
This commit is contained in:
parent
c76d6dd192
commit
eafbaca977
|
@ -1949,6 +1949,7 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
|
|||
MAKE_CASE(AArch64ISD::UDIV_PRED)
|
||||
MAKE_CASE(AArch64ISD::UMAX_PRED)
|
||||
MAKE_CASE(AArch64ISD::UMIN_PRED)
|
||||
MAKE_CASE(AArch64ISD::SRAD_MERGE_OP1)
|
||||
MAKE_CASE(AArch64ISD::FNEG_MERGE_PASSTHRU)
|
||||
MAKE_CASE(AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU)
|
||||
MAKE_CASE(AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU)
|
||||
|
@ -2315,6 +2316,8 @@ static SDValue convertToScalableVector(SelectionDAG &DAG, EVT VT, SDValue V);
|
|||
static SDValue convertFromScalableVector(SelectionDAG &DAG, EVT VT, SDValue V);
|
||||
static SDValue convertFixedMaskToScalableVector(SDValue Mask,
|
||||
SelectionDAG &DAG);
|
||||
static SDValue getPredicateForScalableVector(SelectionDAG &DAG, SDLoc &DL,
|
||||
EVT VT);
|
||||
|
||||
/// isZerosVector - Check whether SDNode N is a zero-filled vector.
|
||||
static bool isZerosVector(const SDNode *N) {
|
||||
|
@ -10962,8 +10965,40 @@ SDValue AArch64TargetLowering::LowerINSERT_SUBVECTOR(SDValue Op,
|
|||
return SDValue();
|
||||
}
|
||||
|
||||
static bool isPow2Splat(SDValue Op, uint64_t &SplatVal, bool &Negated) {
|
||||
if (Op.getOpcode() != AArch64ISD::DUP &&
|
||||
Op.getOpcode() != ISD::SPLAT_VECTOR &&
|
||||
Op.getOpcode() != ISD::BUILD_VECTOR)
|
||||
return false;
|
||||
|
||||
if (Op.getOpcode() == ISD::BUILD_VECTOR &&
|
||||
!isAllConstantBuildVector(Op, SplatVal))
|
||||
return false;
|
||||
|
||||
if (Op.getOpcode() != ISD::BUILD_VECTOR &&
|
||||
!isa<ConstantSDNode>(Op->getOperand(0)))
|
||||
return false;
|
||||
|
||||
SplatVal = Op->getConstantOperandVal(0);
|
||||
if (Op.getValueType().getVectorElementType() != MVT::i64)
|
||||
SplatVal = (int32_t)SplatVal;
|
||||
|
||||
Negated = false;
|
||||
if (isPowerOf2_64(SplatVal))
|
||||
return true;
|
||||
|
||||
Negated = true;
|
||||
if (isPowerOf2_64(-SplatVal)) {
|
||||
SplatVal = -SplatVal;
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
SDValue AArch64TargetLowering::LowerDIV(SDValue Op, SelectionDAG &DAG) const {
|
||||
EVT VT = Op.getValueType();
|
||||
SDLoc dl(Op);
|
||||
|
||||
if (useSVEForFixedLengthVectorVT(VT, /*OverrideNEON=*/true))
|
||||
return LowerFixedLengthVectorIntDivideToSVE(Op, DAG);
|
||||
|
@ -10973,6 +11008,19 @@ SDValue AArch64TargetLowering::LowerDIV(SDValue Op, SelectionDAG &DAG) const {
|
|||
bool Signed = Op.getOpcode() == ISD::SDIV;
|
||||
unsigned PredOpcode = Signed ? AArch64ISD::SDIV_PRED : AArch64ISD::UDIV_PRED;
|
||||
|
||||
bool Negated;
|
||||
uint64_t SplatVal;
|
||||
if (Signed && isPow2Splat(Op.getOperand(1), SplatVal, Negated)) {
|
||||
SDValue Pg = getPredicateForScalableVector(DAG, dl, VT);
|
||||
SDValue Res =
|
||||
DAG.getNode(AArch64ISD::SRAD_MERGE_OP1, dl, VT, Pg, Op->getOperand(0),
|
||||
DAG.getTargetConstant(Log2_64(SplatVal), dl, MVT::i32));
|
||||
if (Negated)
|
||||
Res = DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(0, dl, VT), Res);
|
||||
|
||||
return Res;
|
||||
}
|
||||
|
||||
if (VT == MVT::nxv4i32 || VT == MVT::nxv2i64)
|
||||
return LowerToPredicatedOp(Op, DAG, PredOpcode);
|
||||
|
||||
|
@ -10986,7 +11034,6 @@ SDValue AArch64TargetLowering::LowerDIV(SDValue Op, SelectionDAG &DAG) const {
|
|||
else
|
||||
llvm_unreachable("Unexpected Custom DIV operation");
|
||||
|
||||
SDLoc dl(Op);
|
||||
unsigned UnpkLo = Signed ? AArch64ISD::SUNPKLO : AArch64ISD::UUNPKLO;
|
||||
unsigned UnpkHi = Signed ? AArch64ISD::SUNPKHI : AArch64ISD::UUNPKHI;
|
||||
SDValue Op0Lo = DAG.getNode(UnpkLo, dl, WidenedVT, Op.getOperand(0));
|
||||
|
@ -12978,8 +13025,14 @@ AArch64TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
|
|||
if (isIntDivCheap(N->getValueType(0), Attr))
|
||||
return SDValue(N,0); // Lower SDIV as SDIV
|
||||
|
||||
// fold (sdiv X, pow2)
|
||||
EVT VT = N->getValueType(0);
|
||||
|
||||
// For scalable and fixed types, mark them as cheap so we can handle it much
|
||||
// later. This allows us to handle larger than legal types.
|
||||
if (VT.isScalableVector() || Subtarget->useSVEForFixedLengthVectors())
|
||||
return SDValue(N, 0);
|
||||
|
||||
// fold (sdiv X, pow2)
|
||||
if ((VT != MVT::i32 && VT != MVT::i64) ||
|
||||
!(Divisor.isPowerOf2() || Divisor.isNegatedPowerOf2()))
|
||||
return SDValue();
|
||||
|
@ -15078,6 +15131,9 @@ static SDValue performIntrinsicCombine(SDNode *N,
|
|||
case Intrinsic::aarch64_sve_uqsub_x:
|
||||
return DAG.getNode(ISD::USUBSAT, SDLoc(N), N->getValueType(0),
|
||||
N->getOperand(1), N->getOperand(2));
|
||||
case Intrinsic::aarch64_sve_asrd:
|
||||
return DAG.getNode(AArch64ISD::SRAD_MERGE_OP1, SDLoc(N), N->getValueType(0),
|
||||
N->getOperand(1), N->getOperand(2), N->getOperand(3));
|
||||
case Intrinsic::aarch64_sve_cmphs:
|
||||
if (!N->getOperand(2).getValueType().isFloatingPoint())
|
||||
return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, SDLoc(N),
|
||||
|
@ -18696,6 +18752,21 @@ SDValue AArch64TargetLowering::LowerFixedLengthVectorIntDivideToSVE(
|
|||
bool Signed = Op.getOpcode() == ISD::SDIV;
|
||||
unsigned PredOpcode = Signed ? AArch64ISD::SDIV_PRED : AArch64ISD::UDIV_PRED;
|
||||
|
||||
bool Negated;
|
||||
uint64_t SplatVal;
|
||||
if (Signed && isPow2Splat(Op.getOperand(1), SplatVal, Negated)) {
|
||||
EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
|
||||
SDValue Op1 = convertToScalableVector(DAG, ContainerVT, Op.getOperand(0));
|
||||
SDValue Op2 = DAG.getTargetConstant(Log2_64(SplatVal), dl, MVT::i32);
|
||||
|
||||
SDValue Pg = getPredicateForFixedLengthVector(DAG, dl, VT);
|
||||
SDValue Res = DAG.getNode(AArch64ISD::SRAD_MERGE_OP1, dl, ContainerVT, Pg, Op1, Op2);
|
||||
if (Negated)
|
||||
Res = DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(0, dl, VT), Res);
|
||||
|
||||
return convertFromScalableVector(DAG, VT, Res);
|
||||
}
|
||||
|
||||
// Scalable vector i32/i64 DIV is supported.
|
||||
if (EltVT == MVT::i32 || EltVT == MVT::i64)
|
||||
return LowerToPredicatedOp(Op, DAG, PredOpcode, /*OverrideNEON=*/true);
|
||||
|
|
|
@ -104,6 +104,8 @@ enum NodeType : unsigned {
|
|||
// Unpredicated vector instructions
|
||||
BIC,
|
||||
|
||||
SRAD_MERGE_OP1,
|
||||
|
||||
// Predicated instructions with the result of inactive lanes provided by the
|
||||
// last operand.
|
||||
FABS_MERGE_PASSTHRU,
|
||||
|
|
|
@ -199,6 +199,13 @@ def AArch64umax_p : SDNode<"AArch64ISD::UMAX_PRED", SDT_AArch64Arith>;
|
|||
def AArch64umin_p : SDNode<"AArch64ISD::UMIN_PRED", SDT_AArch64Arith>;
|
||||
def AArch64umulh_p : SDNode<"AArch64ISD::MULHU_PRED", SDT_AArch64Arith>;
|
||||
|
||||
def SDT_AArch64Arith_Imm : SDTypeProfile<1, 3, [
|
||||
SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisVT<3,i32>,
|
||||
SDTCVecEltisVT<1,i1>, SDTCisSameAs<0,2>
|
||||
]>;
|
||||
|
||||
def AArch64asrd_m1 : SDNode<"AArch64ISD::SRAD_MERGE_OP1", SDT_AArch64Arith_Imm>;
|
||||
|
||||
def SDT_AArch64IntExtend : SDTypeProfile<1, 4, [
|
||||
SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisVT<3, OtherVT>, SDTCisVec<4>,
|
||||
SDTCVecEltisVT<1,i1>, SDTCisSameAs<0,2>, SDTCisVTSmallerThanOp<3, 2>, SDTCisSameAs<0,4>
|
||||
|
@ -1575,7 +1582,7 @@ let Predicates = [HasSVEorStreamingSVE] in {
|
|||
defm ASR_ZPmI : sve_int_bin_pred_shift_imm_right_dup<0b0000, "asr", "ASR_ZPZI", int_aarch64_sve_asr>;
|
||||
defm LSR_ZPmI : sve_int_bin_pred_shift_imm_right_dup<0b0001, "lsr", "LSR_ZPZI", int_aarch64_sve_lsr>;
|
||||
defm LSL_ZPmI : sve_int_bin_pred_shift_imm_left_dup< 0b0011, "lsl", "LSL_ZPZI", int_aarch64_sve_lsl>;
|
||||
defm ASRD_ZPmI : sve_int_bin_pred_shift_imm_right< 0b0100, "asrd", "ASRD_ZPZI", int_aarch64_sve_asrd>;
|
||||
defm ASRD_ZPmI : sve_int_bin_pred_shift_imm_right< 0b0100, "asrd", "ASRD_ZPZI", AArch64asrd_m1>;
|
||||
|
||||
defm ASR_ZPZI : sve_int_shift_pred_bhsd<AArch64asr_p, SVEShiftImmR8, SVEShiftImmR16, SVEShiftImmR32, SVEShiftImmR64>;
|
||||
defm LSR_ZPZI : sve_int_shift_pred_bhsd<AArch64lsr_p, SVEShiftImmR8, SVEShiftImmR16, SVEShiftImmR32, SVEShiftImmR64>;
|
||||
|
@ -1586,7 +1593,7 @@ let Predicates = [HasSVEorStreamingSVE, UseExperimentalZeroingPseudos] in {
|
|||
defm ASR_ZPZZ : sve_int_bin_pred_zeroing_bhsd<int_aarch64_sve_asr>;
|
||||
defm LSR_ZPZZ : sve_int_bin_pred_zeroing_bhsd<int_aarch64_sve_lsr>;
|
||||
defm LSL_ZPZZ : sve_int_bin_pred_zeroing_bhsd<int_aarch64_sve_lsl>;
|
||||
defm ASRD_ZPZI : sve_int_bin_pred_shift_imm_right_zeroing_bhsd<int_aarch64_sve_asrd>;
|
||||
defm ASRD_ZPZI : sve_int_bin_pred_shift_imm_right_zeroing_bhsd<AArch64asrd_m1>;
|
||||
} // End HasSVEorStreamingSVE, UseExperimentalZeroingPseudos
|
||||
|
||||
let Predicates = [HasSVEorStreamingSVE] in {
|
||||
|
|
|
@ -0,0 +1,389 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -aarch64-sve-vector-bits-min=256 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_EQ_256
|
||||
; RUN: llc -aarch64-sve-vector-bits-min=384 < %s | FileCheck %s -check-prefixes=CHECK
|
||||
; RUN: llc -aarch64-sve-vector-bits-min=512 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
|
||||
; RUN: llc -aarch64-sve-vector-bits-min=640 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
|
||||
; RUN: llc -aarch64-sve-vector-bits-min=768 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
|
||||
; RUN: llc -aarch64-sve-vector-bits-min=896 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
|
||||
; RUN: llc -aarch64-sve-vector-bits-min=1024 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
|
||||
; RUN: llc -aarch64-sve-vector-bits-min=1152 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
|
||||
; RUN: llc -aarch64-sve-vector-bits-min=1280 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
|
||||
; RUN: llc -aarch64-sve-vector-bits-min=1408 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
|
||||
; RUN: llc -aarch64-sve-vector-bits-min=1536 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
|
||||
; RUN: llc -aarch64-sve-vector-bits-min=1664 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
|
||||
; RUN: llc -aarch64-sve-vector-bits-min=1792 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
|
||||
; RUN: llc -aarch64-sve-vector-bits-min=1920 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
|
||||
; RUN: llc -aarch64-sve-vector-bits-min=2048 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024,VBITS_GE_2048
|
||||
|
||||
target triple = "aarch64-unknown-linux-gnu"
|
||||
|
||||
define <8 x i8> @sdiv_v8i8(<8 x i8> %op1) #0 {
|
||||
; CHECK-LABEL: sdiv_v8i8:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
|
||||
; CHECK-NEXT: ptrue p0.b
|
||||
; CHECK-NEXT: asrd z0.b, p0/m, z0.b, #5
|
||||
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
|
||||
; CHECK-NEXT: ret
|
||||
%res = sdiv <8 x i8> %op1, shufflevector (<8 x i8> insertelement (<8 x i8> poison, i8 32, i32 0), <8 x i8> poison, <8 x i32> zeroinitializer)
|
||||
ret <8 x i8> %res
|
||||
}
|
||||
|
||||
define <16 x i8> @sdiv_v16i8(<16 x i8> %op1) #0 {
|
||||
; CHECK-LABEL: sdiv_v16i8:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
|
||||
; CHECK-NEXT: ptrue p0.b
|
||||
; CHECK-NEXT: asrd z0.b, p0/m, z0.b, #5
|
||||
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
|
||||
; CHECK-NEXT: ret
|
||||
%res = sdiv <16 x i8> %op1, shufflevector (<16 x i8> insertelement (<16 x i8> poison, i8 32, i32 0), <16 x i8> poison, <16 x i32> zeroinitializer)
|
||||
ret <16 x i8> %res
|
||||
}
|
||||
|
||||
define void @sdiv_v32i8(<32 x i8>* %a) #0 {
|
||||
; CHECK-LABEL: sdiv_v32i8:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.b, vl32
|
||||
; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0]
|
||||
; CHECK-NEXT: asrd z0.b, p0/m, z0.b, #5
|
||||
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
|
||||
; CHECK-NEXT: ret
|
||||
%op1 = load <32 x i8>, <32 x i8>* %a
|
||||
%res = sdiv <32 x i8> %op1, shufflevector (<32 x i8> insertelement (<32 x i8> poison, i8 32, i32 0), <32 x i8> poison, <32 x i32> zeroinitializer)
|
||||
store <32 x i8> %res, <32 x i8>* %a
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @sdiv_v64i8(<64 x i8>* %a) #0 {
|
||||
; VBITS_EQ_256-LABEL: sdiv_v64i8:
|
||||
; VBITS_EQ_256: // %bb.0:
|
||||
; VBITS_EQ_256-NEXT: mov w8, #32
|
||||
; VBITS_EQ_256-NEXT: ptrue p0.b, vl32
|
||||
; VBITS_EQ_256-NEXT: ld1b { z0.b }, p0/z, [x0, x8]
|
||||
; VBITS_EQ_256-NEXT: ld1b { z1.b }, p0/z, [x0]
|
||||
; VBITS_EQ_256-NEXT: asrd z0.b, p0/m, z0.b, #5
|
||||
; VBITS_EQ_256-NEXT: asrd z1.b, p0/m, z1.b, #5
|
||||
; VBITS_EQ_256-NEXT: st1b { z0.b }, p0, [x0, x8]
|
||||
; VBITS_EQ_256-NEXT: st1b { z1.b }, p0, [x0]
|
||||
; VBITS_EQ_256-NEXT: ret
|
||||
;
|
||||
; VBITS_GE_512-LABEL: sdiv_v64i8:
|
||||
; VBITS_GE_512: // %bb.0:
|
||||
; VBITS_GE_512-NEXT: ptrue p0.b, vl64
|
||||
; VBITS_GE_512-NEXT: ld1b { z0.b }, p0/z, [x0]
|
||||
; VBITS_GE_512-NEXT: asrd z0.b, p0/m, z0.b, #5
|
||||
; VBITS_GE_512-NEXT: st1b { z0.b }, p0, [x0]
|
||||
; VBITS_GE_512-NEXT: ret
|
||||
%op1 = load <64 x i8>, <64 x i8>* %a
|
||||
%res = sdiv <64 x i8> %op1, shufflevector (<64 x i8> insertelement (<64 x i8> poison, i8 32, i32 0), <64 x i8> poison, <64 x i32> zeroinitializer)
|
||||
store <64 x i8> %res, <64 x i8>* %a
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @sdiv_v128i8(<128 x i8>* %a) #0 {
|
||||
; VBITS_GE_1024-LABEL: sdiv_v128i8:
|
||||
; VBITS_GE_1024: // %bb.0:
|
||||
; VBITS_GE_1024-NEXT: ptrue p0.b, vl128
|
||||
; VBITS_GE_1024-NEXT: ld1b { z0.b }, p0/z, [x0]
|
||||
; VBITS_GE_1024-NEXT: asrd z0.b, p0/m, z0.b, #5
|
||||
; VBITS_GE_1024-NEXT: st1b { z0.b }, p0, [x0]
|
||||
; VBITS_GE_1024-NEXT: ret
|
||||
%op1 = load <128 x i8>, <128 x i8>* %a
|
||||
%res = sdiv <128 x i8> %op1, shufflevector (<128 x i8> insertelement (<128 x i8> poison, i8 32, i32 0), <128 x i8> poison, <128 x i32> zeroinitializer)
|
||||
store <128 x i8> %res, <128 x i8>* %a
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @sdiv_v256i8(<256 x i8>* %a) #0 {
|
||||
; VBITS_GE_2048-LABEL: sdiv_v256i8:
|
||||
; VBITS_GE_2048: // %bb.0:
|
||||
; VBITS_GE_2048-NEXT: ptrue p0.b, vl256
|
||||
; VBITS_GE_2048-NEXT: ld1b { z0.b }, p0/z, [x0]
|
||||
; VBITS_GE_2048-NEXT: asrd z0.b, p0/m, z0.b, #5
|
||||
; VBITS_GE_2048-NEXT: st1b { z0.b }, p0, [x0]
|
||||
; VBITS_GE_2048-NEXT: ret
|
||||
%op1 = load <256 x i8>, <256 x i8>* %a
|
||||
%res = sdiv <256 x i8> %op1, shufflevector (<256 x i8> insertelement (<256 x i8> poison, i8 32, i32 0), <256 x i8> poison, <256 x i32> zeroinitializer)
|
||||
store <256 x i8> %res, <256 x i8>* %a
|
||||
ret void
|
||||
}
|
||||
|
||||
define <4 x i16> @sdiv_v4i16(<4 x i16> %op1) #0 {
|
||||
; CHECK-LABEL: sdiv_v4i16:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
|
||||
; CHECK-NEXT: ptrue p0.h
|
||||
; CHECK-NEXT: asrd z0.h, p0/m, z0.h, #5
|
||||
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
|
||||
; CHECK-NEXT: ret
|
||||
%res = sdiv <4 x i16> %op1, shufflevector (<4 x i16> insertelement (<4 x i16> poison, i16 32, i32 0), <4 x i16> poison, <4 x i32> zeroinitializer)
|
||||
ret <4 x i16> %res
|
||||
}
|
||||
|
||||
define <8 x i16> @sdiv_v8i16(<8 x i16> %op1) #0 {
|
||||
; CHECK-LABEL: sdiv_v8i16:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
|
||||
; CHECK-NEXT: ptrue p0.h
|
||||
; CHECK-NEXT: asrd z0.h, p0/m, z0.h, #5
|
||||
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
|
||||
; CHECK-NEXT: ret
|
||||
%res = sdiv <8 x i16> %op1, shufflevector (<8 x i16> insertelement (<8 x i16> poison, i16 32, i32 0), <8 x i16> poison, <8 x i32> zeroinitializer)
|
||||
ret <8 x i16> %res
|
||||
}
|
||||
|
||||
define void @sdiv_v16i16(<16 x i16>* %a) #0 {
|
||||
; CHECK-LABEL: sdiv_v16i16:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.h, vl16
|
||||
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
|
||||
; CHECK-NEXT: asrd z0.h, p0/m, z0.h, #5
|
||||
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
|
||||
; CHECK-NEXT: ret
|
||||
%op1 = load <16 x i16>, <16 x i16>* %a
|
||||
%res = sdiv <16 x i16> %op1, shufflevector (<16 x i16> insertelement (<16 x i16> poison, i16 32, i32 0), <16 x i16> poison, <16 x i32> zeroinitializer)
|
||||
store <16 x i16> %res, <16 x i16>* %a
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @sdiv_v32i16(<32 x i16>* %a) #0 {
|
||||
; VBITS_EQ_256-LABEL: sdiv_v32i16:
|
||||
; VBITS_EQ_256: // %bb.0:
|
||||
; VBITS_EQ_256-NEXT: mov x8, #16
|
||||
; VBITS_EQ_256-NEXT: ptrue p0.h, vl16
|
||||
; VBITS_EQ_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1]
|
||||
; VBITS_EQ_256-NEXT: ld1h { z1.h }, p0/z, [x0]
|
||||
; VBITS_EQ_256-NEXT: asrd z0.h, p0/m, z0.h, #5
|
||||
; VBITS_EQ_256-NEXT: asrd z1.h, p0/m, z1.h, #5
|
||||
; VBITS_EQ_256-NEXT: st1h { z0.h }, p0, [x0, x8, lsl #1]
|
||||
; VBITS_EQ_256-NEXT: st1h { z1.h }, p0, [x0]
|
||||
; VBITS_EQ_256-NEXT: ret
|
||||
;
|
||||
; VBITS_GE_512-LABEL: sdiv_v32i16:
|
||||
; VBITS_GE_512: // %bb.0:
|
||||
; VBITS_GE_512-NEXT: ptrue p0.h, vl32
|
||||
; VBITS_GE_512-NEXT: ld1h { z0.h }, p0/z, [x0]
|
||||
; VBITS_GE_512-NEXT: asrd z0.h, p0/m, z0.h, #5
|
||||
; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x0]
|
||||
; VBITS_GE_512-NEXT: ret
|
||||
%op1 = load <32 x i16>, <32 x i16>* %a
|
||||
%res = sdiv <32 x i16> %op1, shufflevector (<32 x i16> insertelement (<32 x i16> poison, i16 32, i32 0), <32 x i16> poison, <32 x i32> zeroinitializer)
|
||||
store <32 x i16> %res, <32 x i16>* %a
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @sdiv_v64i16(<64 x i16>* %a) #0 {
|
||||
; VBITS_GE_1024-LABEL: sdiv_v64i16:
|
||||
; VBITS_GE_1024: // %bb.0:
|
||||
; VBITS_GE_1024-NEXT: ptrue p0.h, vl64
|
||||
; VBITS_GE_1024-NEXT: ld1h { z0.h }, p0/z, [x0]
|
||||
; VBITS_GE_1024-NEXT: asrd z0.h, p0/m, z0.h, #5
|
||||
; VBITS_GE_1024-NEXT: st1h { z0.h }, p0, [x0]
|
||||
; VBITS_GE_1024-NEXT: ret
|
||||
%op1 = load <64 x i16>, <64 x i16>* %a
|
||||
%res = sdiv <64 x i16> %op1, shufflevector (<64 x i16> insertelement (<64 x i16> poison, i16 32, i32 0), <64 x i16> poison, <64 x i32> zeroinitializer)
|
||||
store <64 x i16> %res, <64 x i16>* %a
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @sdiv_v128i16(<128 x i16>* %a) #0 {
|
||||
; VBITS_GE_2048-LABEL: sdiv_v128i16:
|
||||
; VBITS_GE_2048: // %bb.0:
|
||||
; VBITS_GE_2048-NEXT: ptrue p0.h, vl128
|
||||
; VBITS_GE_2048-NEXT: ld1h { z0.h }, p0/z, [x0]
|
||||
; VBITS_GE_2048-NEXT: asrd z0.h, p0/m, z0.h, #5
|
||||
; VBITS_GE_2048-NEXT: st1h { z0.h }, p0, [x0]
|
||||
; VBITS_GE_2048-NEXT: ret
|
||||
%op1 = load <128 x i16>, <128 x i16>* %a
|
||||
%res = sdiv <128 x i16> %op1, shufflevector (<128 x i16> insertelement (<128 x i16> poison, i16 32, i32 0), <128 x i16> poison, <128 x i32> zeroinitializer)
|
||||
store <128 x i16> %res, <128 x i16>* %a
|
||||
ret void
|
||||
}
|
||||
|
||||
define <2 x i32> @sdiv_v2i32(<2 x i32> %op1) #0 {
|
||||
; CHECK-LABEL: sdiv_v2i32:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
|
||||
; CHECK-NEXT: ptrue p0.s
|
||||
; CHECK-NEXT: asrd z0.s, p0/m, z0.s, #5
|
||||
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
|
||||
; CHECK-NEXT: ret
|
||||
%res = sdiv <2 x i32> %op1, shufflevector (<2 x i32> insertelement (<2 x i32> poison, i32 32, i32 0), <2 x i32> poison, <2 x i32> zeroinitializer)
|
||||
ret <2 x i32> %res
|
||||
}
|
||||
|
||||
define <4 x i32> @sdiv_v4i32(<4 x i32> %op1) #0 {
|
||||
; CHECK-LABEL: sdiv_v4i32:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
|
||||
; CHECK-NEXT: ptrue p0.s
|
||||
; CHECK-NEXT: asrd z0.s, p0/m, z0.s, #5
|
||||
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
|
||||
; CHECK-NEXT: ret
|
||||
%res = sdiv <4 x i32> %op1, shufflevector (<4 x i32> insertelement (<4 x i32> poison, i32 32, i32 0), <4 x i32> poison, <4 x i32> zeroinitializer)
|
||||
ret <4 x i32> %res
|
||||
}
|
||||
|
||||
define void @sdiv_v8i32(<8 x i32>* %a) #0 {
|
||||
; CHECK-LABEL: sdiv_v8i32:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.s, vl8
|
||||
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
|
||||
; CHECK-NEXT: asrd z0.s, p0/m, z0.s, #5
|
||||
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
|
||||
; CHECK-NEXT: ret
|
||||
%op1 = load <8 x i32>, <8 x i32>* %a
|
||||
%res = sdiv <8 x i32> %op1, shufflevector (<8 x i32> insertelement (<8 x i32> poison, i32 32, i32 0), <8 x i32> poison, <8 x i32> zeroinitializer)
|
||||
store <8 x i32> %res, <8 x i32>* %a
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @sdiv_v16i32(<16 x i32>* %a) #0 {
|
||||
; VBITS_EQ_256-LABEL: sdiv_v16i32:
|
||||
; VBITS_EQ_256: // %bb.0:
|
||||
; VBITS_EQ_256-NEXT: mov x8, #8
|
||||
; VBITS_EQ_256-NEXT: ptrue p0.s, vl8
|
||||
; VBITS_EQ_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
|
||||
; VBITS_EQ_256-NEXT: ld1w { z1.s }, p0/z, [x0]
|
||||
; VBITS_EQ_256-NEXT: asrd z0.s, p0/m, z0.s, #5
|
||||
; VBITS_EQ_256-NEXT: asrd z1.s, p0/m, z1.s, #5
|
||||
; VBITS_EQ_256-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2]
|
||||
; VBITS_EQ_256-NEXT: st1w { z1.s }, p0, [x0]
|
||||
; VBITS_EQ_256-NEXT: ret
|
||||
;
|
||||
; VBITS_GE_512-LABEL: sdiv_v16i32:
|
||||
; VBITS_GE_512: // %bb.0:
|
||||
; VBITS_GE_512-NEXT: ptrue p0.s, vl16
|
||||
; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0]
|
||||
; VBITS_GE_512-NEXT: asrd z0.s, p0/m, z0.s, #5
|
||||
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0]
|
||||
; VBITS_GE_512-NEXT: ret
|
||||
%op1 = load <16 x i32>, <16 x i32>* %a
|
||||
%res = sdiv <16 x i32> %op1, shufflevector (<16 x i32> insertelement (<16 x i32> poison, i32 32, i32 0), <16 x i32> poison, <16 x i32> zeroinitializer)
|
||||
store <16 x i32> %res, <16 x i32>* %a
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @sdiv_v32i32(<32 x i32>* %a) #0 {
|
||||
; VBITS_GE_1024-LABEL: sdiv_v32i32:
|
||||
; VBITS_GE_1024: // %bb.0:
|
||||
; VBITS_GE_1024-NEXT: ptrue p0.s, vl32
|
||||
; VBITS_GE_1024-NEXT: ld1w { z0.s }, p0/z, [x0]
|
||||
; VBITS_GE_1024-NEXT: asrd z0.s, p0/m, z0.s, #5
|
||||
; VBITS_GE_1024-NEXT: st1w { z0.s }, p0, [x0]
|
||||
; VBITS_GE_1024-NEXT: ret
|
||||
%op1 = load <32 x i32>, <32 x i32>* %a
|
||||
%res = sdiv <32 x i32> %op1, shufflevector (<32 x i32> insertelement (<32 x i32> poison, i32 32, i32 0), <32 x i32> poison, <32 x i32> zeroinitializer)
|
||||
store <32 x i32> %res, <32 x i32>* %a
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @sdiv_v64i32(<64 x i32>* %a) #0 {
|
||||
; VBITS_GE_2048-LABEL: sdiv_v64i32:
|
||||
; VBITS_GE_2048: // %bb.0:
|
||||
; VBITS_GE_2048-NEXT: ptrue p0.s, vl64
|
||||
; VBITS_GE_2048-NEXT: ld1w { z0.s }, p0/z, [x0]
|
||||
; VBITS_GE_2048-NEXT: asrd z0.s, p0/m, z0.s, #5
|
||||
; VBITS_GE_2048-NEXT: st1w { z0.s }, p0, [x0]
|
||||
; VBITS_GE_2048-NEXT: ret
|
||||
%op1 = load <64 x i32>, <64 x i32>* %a
|
||||
%res = sdiv <64 x i32> %op1, shufflevector (<64 x i32> insertelement (<64 x i32> poison, i32 32, i32 0), <64 x i32> poison, <64 x i32> zeroinitializer)
|
||||
store <64 x i32> %res, <64 x i32>* %a
|
||||
ret void
|
||||
}
|
||||
|
||||
define <1 x i64> @sdiv_v1i64(<1 x i64> %op1) #0 {
|
||||
; CHECK-LABEL: sdiv_v1i64:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: asrd z0.d, p0/m, z0.d, #5
|
||||
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
|
||||
; CHECK-NEXT: ret
|
||||
%res = sdiv <1 x i64> %op1, shufflevector (<1 x i64> insertelement (<1 x i64> poison, i64 32, i32 0), <1 x i64> poison, <1 x i32> zeroinitializer)
|
||||
ret <1 x i64> %res
|
||||
}
|
||||
|
||||
; Vector i64 sdiv are not legal for NEON so use SVE when available.
|
||||
define <2 x i64> @sdiv_v2i64(<2 x i64> %op1) #0 {
|
||||
; CHECK-LABEL: sdiv_v2i64:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: asrd z0.d, p0/m, z0.d, #5
|
||||
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
|
||||
; CHECK-NEXT: ret
|
||||
%res = sdiv <2 x i64> %op1, shufflevector (<2 x i64> insertelement (<2 x i64> poison, i64 32, i32 0), <2 x i64> poison, <2 x i32> zeroinitializer)
|
||||
ret <2 x i64> %res
|
||||
}
|
||||
|
||||
define void @sdiv_v4i64(<4 x i64>* %a) #0 {
|
||||
; CHECK-LABEL: sdiv_v4i64:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.d, vl4
|
||||
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
|
||||
; CHECK-NEXT: asrd z0.d, p0/m, z0.d, #5
|
||||
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
|
||||
; CHECK-NEXT: ret
|
||||
%op1 = load <4 x i64>, <4 x i64>* %a
|
||||
%res = sdiv <4 x i64> %op1, shufflevector (<4 x i64> insertelement (<4 x i64> poison, i64 32, i32 0), <4 x i64> poison, <4 x i32> zeroinitializer)
|
||||
store <4 x i64> %res, <4 x i64>* %a
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @sdiv_v8i64(<8 x i64>* %a) #0 {
|
||||
; VBITS_EQ_256-LABEL: sdiv_v8i64:
|
||||
; VBITS_EQ_256: // %bb.0:
|
||||
; VBITS_EQ_256-NEXT: mov x8, #4
|
||||
; VBITS_EQ_256-NEXT: ptrue p0.d, vl4
|
||||
; VBITS_EQ_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
|
||||
; VBITS_EQ_256-NEXT: ld1d { z1.d }, p0/z, [x0]
|
||||
; VBITS_EQ_256-NEXT: asrd z0.d, p0/m, z0.d, #5
|
||||
; VBITS_EQ_256-NEXT: asrd z1.d, p0/m, z1.d, #5
|
||||
; VBITS_EQ_256-NEXT: st1d { z0.d }, p0, [x0, x8, lsl #3]
|
||||
; VBITS_EQ_256-NEXT: st1d { z1.d }, p0, [x0]
|
||||
; VBITS_EQ_256-NEXT: ret
|
||||
;
|
||||
; VBITS_GE_512-LABEL: sdiv_v8i64:
|
||||
; VBITS_GE_512: // %bb.0:
|
||||
; VBITS_GE_512-NEXT: ptrue p0.d, vl8
|
||||
; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x0]
|
||||
; VBITS_GE_512-NEXT: asrd z0.d, p0/m, z0.d, #5
|
||||
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x0]
|
||||
; VBITS_GE_512-NEXT: ret
|
||||
%op1 = load <8 x i64>, <8 x i64>* %a
|
||||
%res = sdiv <8 x i64> %op1, shufflevector (<8 x i64> insertelement (<8 x i64> poison, i64 32, i32 0), <8 x i64> poison, <8 x i32> zeroinitializer)
|
||||
store <8 x i64> %res, <8 x i64>* %a
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @sdiv_v16i64(<16 x i64>* %a) #0 {
|
||||
; VBITS_GE_1024-LABEL: sdiv_v16i64:
|
||||
; VBITS_GE_1024: // %bb.0:
|
||||
; VBITS_GE_1024-NEXT: ptrue p0.d, vl16
|
||||
; VBITS_GE_1024-NEXT: ld1d { z0.d }, p0/z, [x0]
|
||||
; VBITS_GE_1024-NEXT: asrd z0.d, p0/m, z0.d, #5
|
||||
; VBITS_GE_1024-NEXT: st1d { z0.d }, p0, [x0]
|
||||
; VBITS_GE_1024-NEXT: ret
|
||||
%op1 = load <16 x i64>, <16 x i64>* %a
|
||||
%res = sdiv <16 x i64> %op1, shufflevector (<16 x i64> insertelement (<16 x i64> poison, i64 32, i32 0), <16 x i64> poison, <16 x i32> zeroinitializer)
|
||||
store <16 x i64> %res, <16 x i64>* %a
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @sdiv_v32i64(<32 x i64>* %a) #0 {
|
||||
; VBITS_GE_2048-LABEL: sdiv_v32i64:
|
||||
; VBITS_GE_2048: // %bb.0:
|
||||
; VBITS_GE_2048-NEXT: ptrue p0.d, vl32
|
||||
; VBITS_GE_2048-NEXT: ld1d { z0.d }, p0/z, [x0]
|
||||
; VBITS_GE_2048-NEXT: asrd z0.d, p0/m, z0.d, #5
|
||||
; VBITS_GE_2048-NEXT: st1d { z0.d }, p0, [x0]
|
||||
; VBITS_GE_2048-NEXT: ret
|
||||
%op1 = load <32 x i64>, <32 x i64>* %a
|
||||
%res = sdiv <32 x i64> %op1, shufflevector (<32 x i64> insertelement (<32 x i64> poison, i64 32, i32 0), <32 x i64> poison, <32 x i32> zeroinitializer)
|
||||
store <32 x i64> %res, <32 x i64>* %a
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { "target-features"="+sve" }
|
|
@ -0,0 +1,90 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc < %s | FileCheck %s
|
||||
|
||||
target triple = "aarch64-unknown-linux-gnu"
|
||||
|
||||
define <vscale x 16 x i8> @sdiv_i8(<vscale x 16 x i8> %a) #0 {
|
||||
; CHECK-LABEL: sdiv_i8:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.b
|
||||
; CHECK-NEXT: asrd z0.b, p0/m, z0.b, #4
|
||||
; CHECK-NEXT: ret
|
||||
%out = sdiv <vscale x 16 x i8> %a, shufflevector (<vscale x 16 x i8> insertelement (<vscale x 16 x i8> poison, i8 16, i32 0), <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer)
|
||||
ret <vscale x 16 x i8> %out
|
||||
}
|
||||
|
||||
define <vscale x 16 x i8> @sdiv_i8_neg(<vscale x 16 x i8> %a) #0 {
|
||||
; CHECK-LABEL: sdiv_i8_neg:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.b
|
||||
; CHECK-NEXT: asrd z0.b, p0/m, z0.b, #6
|
||||
; CHECK-NEXT: subr z0.b, z0.b, #0 // =0x0
|
||||
; CHECK-NEXT: ret
|
||||
%out = sdiv <vscale x 16 x i8> %a, shufflevector (<vscale x 16 x i8> insertelement (<vscale x 16 x i8> poison, i8 -64, i32 0), <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer)
|
||||
ret <vscale x 16 x i8> %out
|
||||
}
|
||||
|
||||
define <vscale x 8 x i16> @sdiv_i16(<vscale x 8 x i16> %a) #0 {
|
||||
; CHECK-LABEL: sdiv_i16:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.h
|
||||
; CHECK-NEXT: asrd z0.h, p0/m, z0.h, #10
|
||||
; CHECK-NEXT: ret
|
||||
%out = sdiv <vscale x 8 x i16> %a, shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> poison, i16 1024, i32 0), <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer)
|
||||
ret <vscale x 8 x i16> %out
|
||||
}
|
||||
|
||||
define <vscale x 8 x i16> @sdiv_i16_neg(<vscale x 8 x i16> %a) #0 {
|
||||
; CHECK-LABEL: sdiv_i16_neg:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.h
|
||||
; CHECK-NEXT: asrd z0.h, p0/m, z0.h, #12
|
||||
; CHECK-NEXT: subr z0.h, z0.h, #0 // =0x0
|
||||
; CHECK-NEXT: ret
|
||||
%out = sdiv <vscale x 8 x i16> %a, shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> poison, i16 -4096, i32 0), <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer)
|
||||
ret <vscale x 8 x i16> %out
|
||||
}
|
||||
|
||||
define <vscale x 4 x i32> @sdiv_i32(<vscale x 4 x i32> %a) #0 {
|
||||
; CHECK-LABEL: sdiv_i32:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.s
|
||||
; CHECK-NEXT: asrd z0.s, p0/m, z0.s, #23
|
||||
; CHECK-NEXT: ret
|
||||
%out = sdiv <vscale x 4 x i32> %a, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 8388608, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
|
||||
ret <vscale x 4 x i32> %out
|
||||
}
|
||||
|
||||
define <vscale x 4 x i32> @sdiv_i32_neg(<vscale x 4 x i32> %a) #0 {
|
||||
; CHECK-LABEL: sdiv_i32_neg:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.s
|
||||
; CHECK-NEXT: asrd z0.s, p0/m, z0.s, #25
|
||||
; CHECK-NEXT: subr z0.s, z0.s, #0 // =0x0
|
||||
; CHECK-NEXT: ret
|
||||
%out = sdiv <vscale x 4 x i32> %a, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 -33554432, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
|
||||
ret <vscale x 4 x i32> %out
|
||||
}
|
||||
|
||||
define <vscale x 2 x i64> @sdiv_i64(<vscale x 2 x i64> %a) #0 {
|
||||
; CHECK-LABEL: sdiv_i64:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: asrd z0.d, p0/m, z0.d, #53
|
||||
; CHECK-NEXT: ret
|
||||
%out = sdiv <vscale x 2 x i64> %a, shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 9007199254740992, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
|
||||
ret <vscale x 2 x i64> %out
|
||||
}
|
||||
|
||||
define <vscale x 2 x i64> @sdiv_i64_neg(<vscale x 2 x i64> %a) #0 {
|
||||
; CHECK-LABEL: sdiv_i64_neg:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: asrd z0.d, p0/m, z0.d, #55
|
||||
; CHECK-NEXT: subr z0.d, z0.d, #0 // =0x0
|
||||
; CHECK-NEXT: ret
|
||||
%out = sdiv <vscale x 2 x i64> %a, shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 -36028797018963968, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
|
||||
ret <vscale x 2 x i64> %out
|
||||
}
|
||||
|
||||
attributes #0 = { "target-features"="+sve" }
|
Loading…
Reference in New Issue