forked from OSchip/llvm-project
[SVE][CodeGen] Add DAG combines for s/zext_masked_gather
This patch adds the following DAGCombines, which apply if isVectorLoadExtDesirable() returns true: - fold (and (masked_gather x)) -> (zext_masked_gather x) - fold (sext_inreg (masked_gather x)) -> (sext_masked_gather x) LowerMGATHER has also been updated to fetch the LoadExtType associated with the gather and also use this value to determine the correct masked gather opcode to use. Reviewed By: sdesmalen Differential Revision: https://reviews.llvm.org/D92230
This commit is contained in:
parent
d568cff696
commit
05edfc5475
|
@ -932,6 +932,33 @@ bool DAGCombiner::isOneUseSetCC(SDValue N) const {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool isConstantSplatVectorMaskForType(SDNode *N, EVT ScalarTy) {
|
||||||
|
if (!ScalarTy.isSimple())
|
||||||
|
return false;
|
||||||
|
|
||||||
|
uint64_t MaskForTy = 0ULL;
|
||||||
|
switch (ScalarTy.getSimpleVT().SimpleTy) {
|
||||||
|
case MVT::i8:
|
||||||
|
MaskForTy = 0xFFULL;
|
||||||
|
break;
|
||||||
|
case MVT::i16:
|
||||||
|
MaskForTy = 0xFFFFULL;
|
||||||
|
break;
|
||||||
|
case MVT::i32:
|
||||||
|
MaskForTy = 0xFFFFFFFFULL;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
return false;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
APInt Val;
|
||||||
|
if (ISD::isConstantSplatVector(N, Val))
|
||||||
|
return Val.getLimitedValue() == MaskForTy;
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
// Returns the SDNode if it is a constant float BuildVector
|
// Returns the SDNode if it is a constant float BuildVector
|
||||||
// or constant float.
|
// or constant float.
|
||||||
static SDNode *isConstantFPBuildVectorOrConstantFP(SDValue N) {
|
static SDNode *isConstantFPBuildVectorOrConstantFP(SDValue N) {
|
||||||
|
@ -5622,6 +5649,28 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// fold (and (masked_gather x)) -> (zext_masked_gather x)
|
||||||
|
if (auto *GN0 = dyn_cast<MaskedGatherSDNode>(N0)) {
|
||||||
|
EVT MemVT = GN0->getMemoryVT();
|
||||||
|
EVT ScalarVT = MemVT.getScalarType();
|
||||||
|
|
||||||
|
if (SDValue(GN0, 0).hasOneUse() &&
|
||||||
|
isConstantSplatVectorMaskForType(N1.getNode(), ScalarVT) &&
|
||||||
|
TLI.isVectorLoadExtDesirable(SDValue(SDValue(GN0, 0)))) {
|
||||||
|
SDValue Ops[] = {GN0->getChain(), GN0->getPassThru(), GN0->getMask(),
|
||||||
|
GN0->getBasePtr(), GN0->getIndex(), GN0->getScale()};
|
||||||
|
|
||||||
|
SDValue ZExtLoad = DAG.getMaskedGather(
|
||||||
|
DAG.getVTList(VT, MVT::Other), MemVT, SDLoc(N), Ops,
|
||||||
|
GN0->getMemOperand(), GN0->getIndexType(), ISD::ZEXTLOAD);
|
||||||
|
|
||||||
|
CombineTo(N, ZExtLoad);
|
||||||
|
AddToWorklist(ZExtLoad.getNode());
|
||||||
|
// Avoid recheck of N.
|
||||||
|
return SDValue(N, 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// fold (and (load x), 255) -> (zextload x, i8)
|
// fold (and (load x), 255) -> (zextload x, i8)
|
||||||
// fold (and (extload x, i16), 255) -> (zextload x, i8)
|
// fold (and (extload x, i16), 255) -> (zextload x, i8)
|
||||||
// fold (and (any_ext (extload x, i16)), 255) -> (zextload x, i8)
|
// fold (and (any_ext (extload x, i16)), 255) -> (zextload x, i8)
|
||||||
|
@ -11597,6 +11646,25 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// fold (sext_inreg (masked_gather x)) -> (sext_masked_gather x)
|
||||||
|
if (auto *GN0 = dyn_cast<MaskedGatherSDNode>(N0)) {
|
||||||
|
if (SDValue(GN0, 0).hasOneUse() &&
|
||||||
|
ExtVT == GN0->getMemoryVT() &&
|
||||||
|
TLI.isVectorLoadExtDesirable(SDValue(SDValue(GN0, 0)))) {
|
||||||
|
SDValue Ops[] = {GN0->getChain(), GN0->getPassThru(), GN0->getMask(),
|
||||||
|
GN0->getBasePtr(), GN0->getIndex(), GN0->getScale()};
|
||||||
|
|
||||||
|
SDValue ExtLoad = DAG.getMaskedGather(
|
||||||
|
DAG.getVTList(VT, MVT::Other), ExtVT, SDLoc(N), Ops,
|
||||||
|
GN0->getMemOperand(), GN0->getIndexType(), ISD::SEXTLOAD);
|
||||||
|
|
||||||
|
CombineTo(N, ExtLoad);
|
||||||
|
CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
|
||||||
|
AddToWorklist(ExtLoad.getNode());
|
||||||
|
return SDValue(N, 0); // Return N so it doesn't get rechecked!
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16))
|
// Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16))
|
||||||
if (ExtVTBits <= 16 && N0.getOpcode() == ISD::OR) {
|
if (ExtVTBits <= 16 && N0.getOpcode() == ISD::OR) {
|
||||||
if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
|
if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
|
||||||
|
|
|
@ -3836,6 +3836,26 @@ unsigned getScatterVecOpcode(bool IsScaled, bool IsSigned, bool NeedsExtend) {
|
||||||
return AddrModes.find(Key)->second;
|
return AddrModes.find(Key)->second;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
unsigned getSignExtendedGatherOpcode(unsigned Opcode) {
|
||||||
|
switch (Opcode) {
|
||||||
|
default:
|
||||||
|
llvm_unreachable("unimplemented opcode");
|
||||||
|
return Opcode;
|
||||||
|
case AArch64ISD::GLD1_MERGE_ZERO:
|
||||||
|
return AArch64ISD::GLD1S_MERGE_ZERO;
|
||||||
|
case AArch64ISD::GLD1_UXTW_MERGE_ZERO:
|
||||||
|
return AArch64ISD::GLD1S_UXTW_MERGE_ZERO;
|
||||||
|
case AArch64ISD::GLD1_SXTW_MERGE_ZERO:
|
||||||
|
return AArch64ISD::GLD1S_SXTW_MERGE_ZERO;
|
||||||
|
case AArch64ISD::GLD1_SCALED_MERGE_ZERO:
|
||||||
|
return AArch64ISD::GLD1S_SCALED_MERGE_ZERO;
|
||||||
|
case AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO:
|
||||||
|
return AArch64ISD::GLD1S_UXTW_SCALED_MERGE_ZERO;
|
||||||
|
case AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO:
|
||||||
|
return AArch64ISD::GLD1S_SXTW_SCALED_MERGE_ZERO;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
bool getGatherScatterIndexIsExtended(SDValue Index) {
|
bool getGatherScatterIndexIsExtended(SDValue Index) {
|
||||||
unsigned Opcode = Index.getOpcode();
|
unsigned Opcode = Index.getOpcode();
|
||||||
if (Opcode == ISD::SIGN_EXTEND_INREG)
|
if (Opcode == ISD::SIGN_EXTEND_INREG)
|
||||||
|
@ -3865,6 +3885,7 @@ SDValue AArch64TargetLowering::LowerMGATHER(SDValue Op,
|
||||||
SDValue PassThru = MGT->getPassThru();
|
SDValue PassThru = MGT->getPassThru();
|
||||||
SDValue Mask = MGT->getMask();
|
SDValue Mask = MGT->getMask();
|
||||||
SDValue BasePtr = MGT->getBasePtr();
|
SDValue BasePtr = MGT->getBasePtr();
|
||||||
|
ISD::LoadExtType ExtTy = MGT->getExtensionType();
|
||||||
|
|
||||||
ISD::MemIndexType IndexType = MGT->getIndexType();
|
ISD::MemIndexType IndexType = MGT->getIndexType();
|
||||||
bool IsScaled =
|
bool IsScaled =
|
||||||
|
@ -3874,6 +3895,7 @@ SDValue AArch64TargetLowering::LowerMGATHER(SDValue Op,
|
||||||
bool IdxNeedsExtend =
|
bool IdxNeedsExtend =
|
||||||
getGatherScatterIndexIsExtended(Index) ||
|
getGatherScatterIndexIsExtended(Index) ||
|
||||||
Index.getSimpleValueType().getVectorElementType() == MVT::i32;
|
Index.getSimpleValueType().getVectorElementType() == MVT::i32;
|
||||||
|
bool ResNeedsSignExtend = ExtTy == ISD::EXTLOAD || ExtTy == ISD::SEXTLOAD;
|
||||||
|
|
||||||
EVT VT = PassThru.getSimpleValueType();
|
EVT VT = PassThru.getSimpleValueType();
|
||||||
EVT MemVT = MGT->getMemoryVT();
|
EVT MemVT = MGT->getMemoryVT();
|
||||||
|
@ -3900,9 +3922,12 @@ SDValue AArch64TargetLowering::LowerMGATHER(SDValue Op,
|
||||||
if (getGatherScatterIndexIsExtended(Index))
|
if (getGatherScatterIndexIsExtended(Index))
|
||||||
Index = Index.getOperand(0);
|
Index = Index.getOperand(0);
|
||||||
|
|
||||||
|
unsigned Opcode = getGatherVecOpcode(IsScaled, IsSigned, IdxNeedsExtend);
|
||||||
|
if (ResNeedsSignExtend)
|
||||||
|
Opcode = getSignExtendedGatherOpcode(Opcode);
|
||||||
|
|
||||||
SDValue Ops[] = {Chain, Mask, BasePtr, Index, InputVT, PassThru};
|
SDValue Ops[] = {Chain, Mask, BasePtr, Index, InputVT, PassThru};
|
||||||
return DAG.getNode(getGatherVecOpcode(IsScaled, IsSigned, IdxNeedsExtend), DL,
|
return DAG.getNode(Opcode, DL, VTs, Ops);
|
||||||
VTs, Ops);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
SDValue AArch64TargetLowering::LowerMSCATTER(SDValue Op,
|
SDValue AArch64TargetLowering::LowerMSCATTER(SDValue Op,
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||||
; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve -aarch64-enable-mgather-combine=0 < %s | FileCheck %s
|
; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve -aarch64-enable-mgather-combine=0 < %s | FileCheck %s
|
||||||
|
; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve -aarch64-enable-mgather-combine=1 < %s | FileCheck %s
|
||||||
|
|
||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
; unscaled unpacked 32-bit offsets
|
; unscaled unpacked 32-bit offsets
|
||||||
|
@ -9,7 +10,6 @@ define <vscale x 2 x i64> @masked_gather_nxv2i16(i16* %base, <vscale x 2 x i32>
|
||||||
; CHECK-LABEL: masked_gather_nxv2i16:
|
; CHECK-LABEL: masked_gather_nxv2i16:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0, z0.d, sxtw #1]
|
; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0, z0.d, sxtw #1]
|
||||||
; CHECK-NEXT: and z0.d, z0.d, #0xffff
|
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: ret
|
||||||
%ptrs = getelementptr i16, i16* %base, <vscale x 2 x i32> %offsets
|
%ptrs = getelementptr i16, i16* %base, <vscale x 2 x i32> %offsets
|
||||||
%vals = call <vscale x 2 x i16> @llvm.masked.gather.nxv2i16(<vscale x 2 x i16*> %ptrs, i32 2, <vscale x 2 x i1> %mask, <vscale x 2 x i16> undef)
|
%vals = call <vscale x 2 x i16> @llvm.masked.gather.nxv2i16(<vscale x 2 x i16*> %ptrs, i32 2, <vscale x 2 x i1> %mask, <vscale x 2 x i16> undef)
|
||||||
|
@ -21,7 +21,6 @@ define <vscale x 2 x i64> @masked_gather_nxv2i32(i32* %base, <vscale x 2 x i32>
|
||||||
; CHECK-LABEL: masked_gather_nxv2i32:
|
; CHECK-LABEL: masked_gather_nxv2i32:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0, z0.d, sxtw #2]
|
; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0, z0.d, sxtw #2]
|
||||||
; CHECK-NEXT: and z0.d, z0.d, #0xffffffff
|
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: ret
|
||||||
%ptrs = getelementptr i32, i32* %base, <vscale x 2 x i32> %offsets
|
%ptrs = getelementptr i32, i32* %base, <vscale x 2 x i32> %offsets
|
||||||
%vals = call <vscale x 2 x i32> @llvm.masked.gather.nxv2i32(<vscale x 2 x i32*> %ptrs, i32 4, <vscale x 2 x i1> %mask, <vscale x 2 x i32> undef)
|
%vals = call <vscale x 2 x i32> @llvm.masked.gather.nxv2i32(<vscale x 2 x i32*> %ptrs, i32 4, <vscale x 2 x i1> %mask, <vscale x 2 x i32> undef)
|
||||||
|
@ -72,9 +71,7 @@ define <vscale x 2 x double> @masked_gather_nxv2f64(double* %base, <vscale x 2 x
|
||||||
define <vscale x 2 x i64> @masked_sgather_nxv2i16(i16* %base, <vscale x 2 x i32> %offsets, <vscale x 2 x i1> %mask) {
|
define <vscale x 2 x i64> @masked_sgather_nxv2i16(i16* %base, <vscale x 2 x i32> %offsets, <vscale x 2 x i1> %mask) {
|
||||||
; CHECK-LABEL: masked_sgather_nxv2i16:
|
; CHECK-LABEL: masked_sgather_nxv2i16:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0, z0.d, sxtw #1]
|
; CHECK-NEXT: ld1sh { z0.d }, p0/z, [x0, z0.d, sxtw #1]
|
||||||
; CHECK-NEXT: ptrue p0.d
|
|
||||||
; CHECK-NEXT: sxth z0.d, p0/m, z0.d
|
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: ret
|
||||||
%ptrs = getelementptr i16, i16* %base, <vscale x 2 x i32> %offsets
|
%ptrs = getelementptr i16, i16* %base, <vscale x 2 x i32> %offsets
|
||||||
%vals = call <vscale x 2 x i16> @llvm.masked.gather.nxv2i16(<vscale x 2 x i16*> %ptrs, i32 2, <vscale x 2 x i1> %mask, <vscale x 2 x i16> undef)
|
%vals = call <vscale x 2 x i16> @llvm.masked.gather.nxv2i16(<vscale x 2 x i16*> %ptrs, i32 2, <vscale x 2 x i1> %mask, <vscale x 2 x i16> undef)
|
||||||
|
@ -85,9 +82,7 @@ define <vscale x 2 x i64> @masked_sgather_nxv2i16(i16* %base, <vscale x 2 x i32>
|
||||||
define <vscale x 2 x i64> @masked_sgather_nxv2i32(i32* %base, <vscale x 2 x i32> %offsets, <vscale x 2 x i1> %mask) {
|
define <vscale x 2 x i64> @masked_sgather_nxv2i32(i32* %base, <vscale x 2 x i32> %offsets, <vscale x 2 x i1> %mask) {
|
||||||
; CHECK-LABEL: masked_sgather_nxv2i32:
|
; CHECK-LABEL: masked_sgather_nxv2i32:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0, z0.d, sxtw #2]
|
; CHECK-NEXT: ld1sw { z0.d }, p0/z, [x0, z0.d, sxtw #2]
|
||||||
; CHECK-NEXT: ptrue p0.d
|
|
||||||
; CHECK-NEXT: sxtw z0.d, p0/m, z0.d
|
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: ret
|
||||||
%ptrs = getelementptr i32, i32* %base, <vscale x 2 x i32> %offsets
|
%ptrs = getelementptr i32, i32* %base, <vscale x 2 x i32> %offsets
|
||||||
%vals = call <vscale x 2 x i32> @llvm.masked.gather.nxv2i32(<vscale x 2 x i32*> %ptrs, i32 4, <vscale x 2 x i1> %mask, <vscale x 2 x i32> undef)
|
%vals = call <vscale x 2 x i32> @llvm.masked.gather.nxv2i32(<vscale x 2 x i32*> %ptrs, i32 4, <vscale x 2 x i1> %mask, <vscale x 2 x i32> undef)
|
||||||
|
@ -103,7 +98,6 @@ define <vscale x 4 x i32> @masked_gather_nxv4i16(i16* %base, <vscale x 4 x i32>
|
||||||
; CHECK-LABEL: masked_gather_nxv4i16:
|
; CHECK-LABEL: masked_gather_nxv4i16:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0, z0.s, sxtw #1]
|
; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0, z0.s, sxtw #1]
|
||||||
; CHECK-NEXT: and z0.s, z0.s, #0xffff
|
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: ret
|
||||||
%ptrs = getelementptr i16, i16* %base, <vscale x 4 x i32> %offsets
|
%ptrs = getelementptr i16, i16* %base, <vscale x 4 x i32> %offsets
|
||||||
%vals = call <vscale x 4 x i16> @llvm.masked.gather.nxv4i16(<vscale x 4 x i16*> %ptrs, i32 2, <vscale x 4 x i1> %mask, <vscale x 4 x i16> undef)
|
%vals = call <vscale x 4 x i16> @llvm.masked.gather.nxv4i16(<vscale x 4 x i16*> %ptrs, i32 2, <vscale x 4 x i1> %mask, <vscale x 4 x i16> undef)
|
||||||
|
@ -144,9 +138,7 @@ define <vscale x 4 x float> @masked_gather_nxv4f32(float* %base, <vscale x 4 x i
|
||||||
define <vscale x 4 x i32> @masked_sgather_nxv4i16(i16* %base, <vscale x 4 x i32> %offsets, <vscale x 4 x i1> %mask) {
|
define <vscale x 4 x i32> @masked_sgather_nxv4i16(i16* %base, <vscale x 4 x i32> %offsets, <vscale x 4 x i1> %mask) {
|
||||||
; CHECK-LABEL: masked_sgather_nxv4i16:
|
; CHECK-LABEL: masked_sgather_nxv4i16:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0, z0.s, sxtw #1]
|
; CHECK-NEXT: ld1sh { z0.s }, p0/z, [x0, z0.s, sxtw #1]
|
||||||
; CHECK-NEXT: ptrue p0.s
|
|
||||||
; CHECK-NEXT: sxth z0.s, p0/m, z0.s
|
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: ret
|
||||||
%ptrs = getelementptr i16, i16* %base, <vscale x 4 x i32> %offsets
|
%ptrs = getelementptr i16, i16* %base, <vscale x 4 x i32> %offsets
|
||||||
%vals = call <vscale x 4 x i16> @llvm.masked.gather.nxv4i16(<vscale x 4 x i16*> %ptrs, i32 2, <vscale x 4 x i1> %mask, <vscale x 4 x i16> undef)
|
%vals = call <vscale x 4 x i16> @llvm.masked.gather.nxv4i16(<vscale x 4 x i16*> %ptrs, i32 2, <vscale x 4 x i1> %mask, <vscale x 4 x i16> undef)
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||||
; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve -aarch64-enable-mgather-combine=0 < %s | FileCheck %s
|
; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve -aarch64-enable-mgather-combine=0 < %s | FileCheck %s
|
||||||
|
; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve -aarch64-enable-mgather-combine=1 < %s | FileCheck %s
|
||||||
|
|
||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
; unscaled unpacked 32-bit offsets
|
; unscaled unpacked 32-bit offsets
|
||||||
|
@ -9,7 +10,6 @@ define <vscale x 2 x i64> @masked_gather_nxv2i8(i8* %base, <vscale x 2 x i32> %o
|
||||||
; CHECK-LABEL: masked_gather_nxv2i8:
|
; CHECK-LABEL: masked_gather_nxv2i8:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ld1b { z0.d }, p0/z, [x0, z0.d, sxtw]
|
; CHECK-NEXT: ld1b { z0.d }, p0/z, [x0, z0.d, sxtw]
|
||||||
; CHECK-NEXT: and z0.d, z0.d, #0xff
|
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: ret
|
||||||
%ptrs = getelementptr i8, i8* %base, <vscale x 2 x i32> %offsets
|
%ptrs = getelementptr i8, i8* %base, <vscale x 2 x i32> %offsets
|
||||||
%vals = call <vscale x 2 x i8> @llvm.masked.gather.nxv2i8(<vscale x 2 x i8*> %ptrs, i32 1, <vscale x 2 x i1> %mask, <vscale x 2 x i8> undef)
|
%vals = call <vscale x 2 x i8> @llvm.masked.gather.nxv2i8(<vscale x 2 x i8*> %ptrs, i32 1, <vscale x 2 x i1> %mask, <vscale x 2 x i8> undef)
|
||||||
|
@ -21,7 +21,6 @@ define <vscale x 2 x i64> @masked_gather_nxv2i16(i8* %base, <vscale x 2 x i32> %
|
||||||
; CHECK-LABEL: masked_gather_nxv2i16:
|
; CHECK-LABEL: masked_gather_nxv2i16:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0, z0.d, sxtw]
|
; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0, z0.d, sxtw]
|
||||||
; CHECK-NEXT: and z0.d, z0.d, #0xffff
|
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: ret
|
||||||
%byte_ptrs = getelementptr i8, i8* %base, <vscale x 2 x i32> %offsets
|
%byte_ptrs = getelementptr i8, i8* %base, <vscale x 2 x i32> %offsets
|
||||||
%ptrs = bitcast <vscale x 2 x i8*> %byte_ptrs to <vscale x 2 x i16*>
|
%ptrs = bitcast <vscale x 2 x i8*> %byte_ptrs to <vscale x 2 x i16*>
|
||||||
|
@ -34,7 +33,6 @@ define <vscale x 2 x i64> @masked_gather_nxv2i32(i8* %base, <vscale x 2 x i32> %
|
||||||
; CHECK-LABEL: masked_gather_nxv2i32:
|
; CHECK-LABEL: masked_gather_nxv2i32:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0, z0.d, sxtw]
|
; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0, z0.d, sxtw]
|
||||||
; CHECK-NEXT: and z0.d, z0.d, #0xffffffff
|
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: ret
|
||||||
%byte_ptrs = getelementptr i8, i8* %base, <vscale x 2 x i32> %offsets
|
%byte_ptrs = getelementptr i8, i8* %base, <vscale x 2 x i32> %offsets
|
||||||
%ptrs = bitcast <vscale x 2 x i8*> %byte_ptrs to <vscale x 2 x i32*>
|
%ptrs = bitcast <vscale x 2 x i8*> %byte_ptrs to <vscale x 2 x i32*>
|
||||||
|
@ -90,9 +88,7 @@ define <vscale x 2 x double> @masked_gather_nxv2f64(i8* %base, <vscale x 2 x i32
|
||||||
define <vscale x 2 x i64> @masked_sgather_nxv2i8(i8* %base, <vscale x 2 x i32> %offsets, <vscale x 2 x i1> %mask) {
|
define <vscale x 2 x i64> @masked_sgather_nxv2i8(i8* %base, <vscale x 2 x i32> %offsets, <vscale x 2 x i1> %mask) {
|
||||||
; CHECK-LABEL: masked_sgather_nxv2i8:
|
; CHECK-LABEL: masked_sgather_nxv2i8:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ld1b { z0.d }, p0/z, [x0, z0.d, sxtw]
|
; CHECK-NEXT: ld1sb { z0.d }, p0/z, [x0, z0.d, sxtw]
|
||||||
; CHECK-NEXT: ptrue p0.d
|
|
||||||
; CHECK-NEXT: sxtb z0.d, p0/m, z0.d
|
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: ret
|
||||||
%ptrs = getelementptr i8, i8* %base, <vscale x 2 x i32> %offsets
|
%ptrs = getelementptr i8, i8* %base, <vscale x 2 x i32> %offsets
|
||||||
%vals = call <vscale x 2 x i8> @llvm.masked.gather.nxv2i8(<vscale x 2 x i8*> %ptrs, i32 1, <vscale x 2 x i1> %mask, <vscale x 2 x i8> undef)
|
%vals = call <vscale x 2 x i8> @llvm.masked.gather.nxv2i8(<vscale x 2 x i8*> %ptrs, i32 1, <vscale x 2 x i1> %mask, <vscale x 2 x i8> undef)
|
||||||
|
@ -103,9 +99,7 @@ define <vscale x 2 x i64> @masked_sgather_nxv2i8(i8* %base, <vscale x 2 x i32> %
|
||||||
define <vscale x 2 x i64> @masked_sgather_nxv2i16(i8* %base, <vscale x 2 x i32> %offsets, <vscale x 2 x i1> %mask) {
|
define <vscale x 2 x i64> @masked_sgather_nxv2i16(i8* %base, <vscale x 2 x i32> %offsets, <vscale x 2 x i1> %mask) {
|
||||||
; CHECK-LABEL: masked_sgather_nxv2i16:
|
; CHECK-LABEL: masked_sgather_nxv2i16:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0, z0.d, sxtw]
|
; CHECK-NEXT: ld1sh { z0.d }, p0/z, [x0, z0.d, sxtw]
|
||||||
; CHECK-NEXT: ptrue p0.d
|
|
||||||
; CHECK-NEXT: sxth z0.d, p0/m, z0.d
|
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: ret
|
||||||
%byte_ptrs = getelementptr i8, i8* %base, <vscale x 2 x i32> %offsets
|
%byte_ptrs = getelementptr i8, i8* %base, <vscale x 2 x i32> %offsets
|
||||||
%ptrs = bitcast <vscale x 2 x i8*> %byte_ptrs to <vscale x 2 x i16*>
|
%ptrs = bitcast <vscale x 2 x i8*> %byte_ptrs to <vscale x 2 x i16*>
|
||||||
|
@ -117,9 +111,7 @@ define <vscale x 2 x i64> @masked_sgather_nxv2i16(i8* %base, <vscale x 2 x i32>
|
||||||
define <vscale x 2 x i64> @masked_sgather_nxv2i32(i8* %base, <vscale x 2 x i32> %offsets, <vscale x 2 x i1> %mask) {
|
define <vscale x 2 x i64> @masked_sgather_nxv2i32(i8* %base, <vscale x 2 x i32> %offsets, <vscale x 2 x i1> %mask) {
|
||||||
; CHECK-LABEL: masked_sgather_nxv2i32:
|
; CHECK-LABEL: masked_sgather_nxv2i32:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0, z0.d, sxtw]
|
; CHECK-NEXT: ld1sw { z0.d }, p0/z, [x0, z0.d, sxtw]
|
||||||
; CHECK-NEXT: ptrue p0.d
|
|
||||||
; CHECK-NEXT: sxtw z0.d, p0/m, z0.d
|
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: ret
|
||||||
%byte_ptrs = getelementptr i8, i8* %base, <vscale x 2 x i32> %offsets
|
%byte_ptrs = getelementptr i8, i8* %base, <vscale x 2 x i32> %offsets
|
||||||
%ptrs = bitcast <vscale x 2 x i8*> %byte_ptrs to <vscale x 2 x i32*>
|
%ptrs = bitcast <vscale x 2 x i8*> %byte_ptrs to <vscale x 2 x i32*>
|
||||||
|
@ -136,7 +128,6 @@ define <vscale x 4 x i32> @masked_gather_nxv4i8(i8* %base, <vscale x 4 x i32> %o
|
||||||
; CHECK-LABEL: masked_gather_nxv4i8:
|
; CHECK-LABEL: masked_gather_nxv4i8:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ld1b { z0.s }, p0/z, [x0, z0.s, sxtw]
|
; CHECK-NEXT: ld1b { z0.s }, p0/z, [x0, z0.s, sxtw]
|
||||||
; CHECK-NEXT: and z0.s, z0.s, #0xff
|
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: ret
|
||||||
%ptrs = getelementptr i8, i8* %base, <vscale x 4 x i32> %offsets
|
%ptrs = getelementptr i8, i8* %base, <vscale x 4 x i32> %offsets
|
||||||
%vals = call <vscale x 4 x i8> @llvm.masked.gather.nxv4i8(<vscale x 4 x i8*> %ptrs, i32 1, <vscale x 4 x i1> %mask, <vscale x 4 x i8> undef)
|
%vals = call <vscale x 4 x i8> @llvm.masked.gather.nxv4i8(<vscale x 4 x i8*> %ptrs, i32 1, <vscale x 4 x i1> %mask, <vscale x 4 x i8> undef)
|
||||||
|
@ -148,7 +139,6 @@ define <vscale x 4 x i32> @masked_gather_nxv4i16(i8* %base, <vscale x 4 x i32> %
|
||||||
; CHECK-LABEL: masked_gather_nxv4i16:
|
; CHECK-LABEL: masked_gather_nxv4i16:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0, z0.s, sxtw]
|
; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0, z0.s, sxtw]
|
||||||
; CHECK-NEXT: and z0.s, z0.s, #0xffff
|
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: ret
|
||||||
%byte_ptrs = getelementptr i8, i8* %base, <vscale x 4 x i32> %offsets
|
%byte_ptrs = getelementptr i8, i8* %base, <vscale x 4 x i32> %offsets
|
||||||
%ptrs = bitcast <vscale x 4 x i8*> %byte_ptrs to <vscale x 4 x i16*>
|
%ptrs = bitcast <vscale x 4 x i8*> %byte_ptrs to <vscale x 4 x i16*>
|
||||||
|
@ -193,9 +183,7 @@ define <vscale x 4 x float> @masked_gather_nxv4f32(i8* %base, <vscale x 4 x i32>
|
||||||
define <vscale x 4 x i32> @masked_sgather_nxv4i8(i8* %base, <vscale x 4 x i32> %offsets, <vscale x 4 x i1> %mask) {
|
define <vscale x 4 x i32> @masked_sgather_nxv4i8(i8* %base, <vscale x 4 x i32> %offsets, <vscale x 4 x i1> %mask) {
|
||||||
; CHECK-LABEL: masked_sgather_nxv4i8:
|
; CHECK-LABEL: masked_sgather_nxv4i8:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ld1b { z0.s }, p0/z, [x0, z0.s, sxtw]
|
; CHECK-NEXT: ld1sb { z0.s }, p0/z, [x0, z0.s, sxtw]
|
||||||
; CHECK-NEXT: ptrue p0.s
|
|
||||||
; CHECK-NEXT: sxtb z0.s, p0/m, z0.s
|
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: ret
|
||||||
%ptrs = getelementptr i8, i8* %base, <vscale x 4 x i32> %offsets
|
%ptrs = getelementptr i8, i8* %base, <vscale x 4 x i32> %offsets
|
||||||
%vals = call <vscale x 4 x i8> @llvm.masked.gather.nxv4i8(<vscale x 4 x i8*> %ptrs, i32 1, <vscale x 4 x i1> %mask, <vscale x 4 x i8> undef)
|
%vals = call <vscale x 4 x i8> @llvm.masked.gather.nxv4i8(<vscale x 4 x i8*> %ptrs, i32 1, <vscale x 4 x i1> %mask, <vscale x 4 x i8> undef)
|
||||||
|
@ -206,9 +194,7 @@ define <vscale x 4 x i32> @masked_sgather_nxv4i8(i8* %base, <vscale x 4 x i32> %
|
||||||
define <vscale x 4 x i32> @masked_sgather_nxv4i16(i8* %base, <vscale x 4 x i32> %offsets, <vscale x 4 x i1> %mask) {
|
define <vscale x 4 x i32> @masked_sgather_nxv4i16(i8* %base, <vscale x 4 x i32> %offsets, <vscale x 4 x i1> %mask) {
|
||||||
; CHECK-LABEL: masked_sgather_nxv4i16:
|
; CHECK-LABEL: masked_sgather_nxv4i16:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0, z0.s, sxtw]
|
; CHECK-NEXT: ld1sh { z0.s }, p0/z, [x0, z0.s, sxtw]
|
||||||
; CHECK-NEXT: ptrue p0.s
|
|
||||||
; CHECK-NEXT: sxth z0.s, p0/m, z0.s
|
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: ret
|
||||||
%byte_ptrs = getelementptr i8, i8* %base, <vscale x 4 x i32> %offsets
|
%byte_ptrs = getelementptr i8, i8* %base, <vscale x 4 x i32> %offsets
|
||||||
%ptrs = bitcast <vscale x 4 x i8*> %byte_ptrs to <vscale x 4 x i16*>
|
%ptrs = bitcast <vscale x 4 x i8*> %byte_ptrs to <vscale x 4 x i16*>
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||||
; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve -aarch64-enable-mgather-combine=0 < %s | FileCheck %s
|
; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve -aarch64-enable-mgather-combine=0 < %s | FileCheck %s
|
||||||
|
; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve -aarch64-enable-mgather-combine=1 < %s | FileCheck %s
|
||||||
|
|
||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
; unscaled unpacked 32-bit offsets
|
; unscaled unpacked 32-bit offsets
|
||||||
|
@ -9,7 +10,6 @@ define <vscale x 2 x i64> @masked_gather_nxv2i16(i16* %base, <vscale x 2 x i32>
|
||||||
; CHECK-LABEL: masked_gather_nxv2i16:
|
; CHECK-LABEL: masked_gather_nxv2i16:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0, z0.d, uxtw #1]
|
; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0, z0.d, uxtw #1]
|
||||||
; CHECK-NEXT: and z0.d, z0.d, #0xffff
|
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: ret
|
||||||
%offsets.zext = zext <vscale x 2 x i32> %offsets to <vscale x 2 x i64>
|
%offsets.zext = zext <vscale x 2 x i32> %offsets to <vscale x 2 x i64>
|
||||||
%ptrs = getelementptr i16, i16* %base, <vscale x 2 x i64> %offsets.zext
|
%ptrs = getelementptr i16, i16* %base, <vscale x 2 x i64> %offsets.zext
|
||||||
|
@ -22,7 +22,6 @@ define <vscale x 2 x i64> @masked_gather_nxv2i32(i32* %base, <vscale x 2 x i32>
|
||||||
; CHECK-LABEL: masked_gather_nxv2i32:
|
; CHECK-LABEL: masked_gather_nxv2i32:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0, z0.d, uxtw #2]
|
; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0, z0.d, uxtw #2]
|
||||||
; CHECK-NEXT: and z0.d, z0.d, #0xffffffff
|
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: ret
|
||||||
%offsets.zext = zext <vscale x 2 x i32> %offsets to <vscale x 2 x i64>
|
%offsets.zext = zext <vscale x 2 x i32> %offsets to <vscale x 2 x i64>
|
||||||
%ptrs = getelementptr i32, i32* %base, <vscale x 2 x i64> %offsets.zext
|
%ptrs = getelementptr i32, i32* %base, <vscale x 2 x i64> %offsets.zext
|
||||||
|
@ -78,9 +77,7 @@ define <vscale x 2 x double> @masked_gather_nxv2f64(double* %base, <vscale x 2 x
|
||||||
define <vscale x 2 x i64> @masked_sgather_nxv2i16(i16* %base, <vscale x 2 x i32> %offsets, <vscale x 2 x i1> %mask) {
|
define <vscale x 2 x i64> @masked_sgather_nxv2i16(i16* %base, <vscale x 2 x i32> %offsets, <vscale x 2 x i1> %mask) {
|
||||||
; CHECK-LABEL: masked_sgather_nxv2i16:
|
; CHECK-LABEL: masked_sgather_nxv2i16:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0, z0.d, uxtw #1]
|
; CHECK-NEXT: ld1sh { z0.d }, p0/z, [x0, z0.d, uxtw #1]
|
||||||
; CHECK-NEXT: ptrue p0.d
|
|
||||||
; CHECK-NEXT: sxth z0.d, p0/m, z0.d
|
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: ret
|
||||||
%offsets.zext = zext <vscale x 2 x i32> %offsets to <vscale x 2 x i64>
|
%offsets.zext = zext <vscale x 2 x i32> %offsets to <vscale x 2 x i64>
|
||||||
%ptrs = getelementptr i16, i16* %base, <vscale x 2 x i64> %offsets.zext
|
%ptrs = getelementptr i16, i16* %base, <vscale x 2 x i64> %offsets.zext
|
||||||
|
@ -92,9 +89,7 @@ define <vscale x 2 x i64> @masked_sgather_nxv2i16(i16* %base, <vscale x 2 x i32>
|
||||||
define <vscale x 2 x i64> @masked_sgather_nxv2i32(i32* %base, <vscale x 2 x i32> %offsets, <vscale x 2 x i1> %mask) {
|
define <vscale x 2 x i64> @masked_sgather_nxv2i32(i32* %base, <vscale x 2 x i32> %offsets, <vscale x 2 x i1> %mask) {
|
||||||
; CHECK-LABEL: masked_sgather_nxv2i32:
|
; CHECK-LABEL: masked_sgather_nxv2i32:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0, z0.d, uxtw #2]
|
; CHECK-NEXT: ld1sw { z0.d }, p0/z, [x0, z0.d, uxtw #2]
|
||||||
; CHECK-NEXT: ptrue p0.d
|
|
||||||
; CHECK-NEXT: sxtw z0.d, p0/m, z0.d
|
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: ret
|
||||||
%offsets.zext = zext <vscale x 2 x i32> %offsets to <vscale x 2 x i64>
|
%offsets.zext = zext <vscale x 2 x i32> %offsets to <vscale x 2 x i64>
|
||||||
%ptrs = getelementptr i32, i32* %base, <vscale x 2 x i64> %offsets.zext
|
%ptrs = getelementptr i32, i32* %base, <vscale x 2 x i64> %offsets.zext
|
||||||
|
@ -111,7 +106,6 @@ define <vscale x 4 x i32> @masked_gather_nxv4i16(i16* %base, <vscale x 4 x i32>
|
||||||
; CHECK-LABEL: masked_gather_nxv4i16:
|
; CHECK-LABEL: masked_gather_nxv4i16:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0, z0.s, uxtw #1]
|
; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0, z0.s, uxtw #1]
|
||||||
; CHECK-NEXT: and z0.s, z0.s, #0xffff
|
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: ret
|
||||||
%offsets.zext = zext <vscale x 4 x i32> %offsets to <vscale x 4 x i64>
|
%offsets.zext = zext <vscale x 4 x i32> %offsets to <vscale x 4 x i64>
|
||||||
%ptrs = getelementptr i16, i16* %base, <vscale x 4 x i64> %offsets.zext
|
%ptrs = getelementptr i16, i16* %base, <vscale x 4 x i64> %offsets.zext
|
||||||
|
@ -156,9 +150,7 @@ define <vscale x 4 x float> @masked_gather_nxv4f32(float* %base, <vscale x 4 x i
|
||||||
define <vscale x 4 x i32> @masked_sgather_nxv4i16(i16* %base, <vscale x 4 x i32> %offsets, <vscale x 4 x i1> %mask) {
|
define <vscale x 4 x i32> @masked_sgather_nxv4i16(i16* %base, <vscale x 4 x i32> %offsets, <vscale x 4 x i1> %mask) {
|
||||||
; CHECK-LABEL: masked_sgather_nxv4i16:
|
; CHECK-LABEL: masked_sgather_nxv4i16:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0, z0.s, uxtw #1]
|
; CHECK-NEXT: ld1sh { z0.s }, p0/z, [x0, z0.s, uxtw #1]
|
||||||
; CHECK-NEXT: ptrue p0.s
|
|
||||||
; CHECK-NEXT: sxth z0.s, p0/m, z0.s
|
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: ret
|
||||||
%offsets.zext = zext <vscale x 4 x i32> %offsets to <vscale x 4 x i64>
|
%offsets.zext = zext <vscale x 4 x i32> %offsets to <vscale x 4 x i64>
|
||||||
%ptrs = getelementptr i16, i16* %base, <vscale x 4 x i64> %offsets.zext
|
%ptrs = getelementptr i16, i16* %base, <vscale x 4 x i64> %offsets.zext
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||||
; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve -aarch64-enable-mgather-combine=0 < %s | FileCheck %s
|
; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve -aarch64-enable-mgather-combine=0 < %s | FileCheck %s
|
||||||
|
; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve -aarch64-enable-mgather-combine=1 < %s | FileCheck %s
|
||||||
|
|
||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
; unscaled unpacked 32-bit offsets
|
; unscaled unpacked 32-bit offsets
|
||||||
|
@ -9,7 +10,6 @@ define <vscale x 2 x i64> @masked_gather_nxv2i8(i8* %base, <vscale x 2 x i32> %o
|
||||||
; CHECK-LABEL: masked_gather_nxv2i8:
|
; CHECK-LABEL: masked_gather_nxv2i8:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ld1b { z0.d }, p0/z, [x0, z0.d, uxtw]
|
; CHECK-NEXT: ld1b { z0.d }, p0/z, [x0, z0.d, uxtw]
|
||||||
; CHECK-NEXT: and z0.d, z0.d, #0xff
|
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: ret
|
||||||
%offsets.zext = zext <vscale x 2 x i32> %offsets to <vscale x 2 x i64>
|
%offsets.zext = zext <vscale x 2 x i32> %offsets to <vscale x 2 x i64>
|
||||||
%ptrs = getelementptr i8, i8* %base, <vscale x 2 x i64> %offsets.zext
|
%ptrs = getelementptr i8, i8* %base, <vscale x 2 x i64> %offsets.zext
|
||||||
|
@ -22,7 +22,6 @@ define <vscale x 2 x i64> @masked_gather_nxv2i16(i8* %base, <vscale x 2 x i32> %
|
||||||
; CHECK-LABEL: masked_gather_nxv2i16:
|
; CHECK-LABEL: masked_gather_nxv2i16:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0, z0.d, uxtw]
|
; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0, z0.d, uxtw]
|
||||||
; CHECK-NEXT: and z0.d, z0.d, #0xffff
|
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: ret
|
||||||
%offsets.zext = zext <vscale x 2 x i32> %offsets to <vscale x 2 x i64>
|
%offsets.zext = zext <vscale x 2 x i32> %offsets to <vscale x 2 x i64>
|
||||||
%byte_ptrs = getelementptr i8, i8* %base, <vscale x 2 x i64> %offsets.zext
|
%byte_ptrs = getelementptr i8, i8* %base, <vscale x 2 x i64> %offsets.zext
|
||||||
|
@ -36,7 +35,6 @@ define <vscale x 2 x i64> @masked_gather_nxv2i32(i8* %base, <vscale x 2 x i32> %
|
||||||
; CHECK-LABEL: masked_gather_nxv2i32:
|
; CHECK-LABEL: masked_gather_nxv2i32:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0, z0.d, uxtw]
|
; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0, z0.d, uxtw]
|
||||||
; CHECK-NEXT: and z0.d, z0.d, #0xffffffff
|
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: ret
|
||||||
%offsets.zext = zext <vscale x 2 x i32> %offsets to <vscale x 2 x i64>
|
%offsets.zext = zext <vscale x 2 x i32> %offsets to <vscale x 2 x i64>
|
||||||
%byte_ptrs = getelementptr i8, i8* %base, <vscale x 2 x i64> %offsets.zext
|
%byte_ptrs = getelementptr i8, i8* %base, <vscale x 2 x i64> %offsets.zext
|
||||||
|
@ -97,9 +95,7 @@ define <vscale x 2 x double> @masked_gather_nxv2f64(i8* %base, <vscale x 2 x i32
|
||||||
define <vscale x 2 x i64> @masked_sgather_nxv2i8(i8* %base, <vscale x 2 x i32> %offsets, <vscale x 2 x i1> %mask) {
|
define <vscale x 2 x i64> @masked_sgather_nxv2i8(i8* %base, <vscale x 2 x i32> %offsets, <vscale x 2 x i1> %mask) {
|
||||||
; CHECK-LABEL: masked_sgather_nxv2i8:
|
; CHECK-LABEL: masked_sgather_nxv2i8:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ld1b { z0.d }, p0/z, [x0, z0.d, uxtw]
|
; CHECK-NEXT: ld1sb { z0.d }, p0/z, [x0, z0.d, uxtw]
|
||||||
; CHECK-NEXT: ptrue p0.d
|
|
||||||
; CHECK-NEXT: sxtb z0.d, p0/m, z0.d
|
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: ret
|
||||||
%offsets.zext = zext <vscale x 2 x i32> %offsets to <vscale x 2 x i64>
|
%offsets.zext = zext <vscale x 2 x i32> %offsets to <vscale x 2 x i64>
|
||||||
%ptrs = getelementptr i8, i8* %base, <vscale x 2 x i64> %offsets.zext
|
%ptrs = getelementptr i8, i8* %base, <vscale x 2 x i64> %offsets.zext
|
||||||
|
@ -111,9 +107,7 @@ define <vscale x 2 x i64> @masked_sgather_nxv2i8(i8* %base, <vscale x 2 x i32> %
|
||||||
define <vscale x 2 x i64> @masked_sgather_nxv2i16(i8* %base, <vscale x 2 x i32> %offsets, <vscale x 2 x i1> %mask) {
|
define <vscale x 2 x i64> @masked_sgather_nxv2i16(i8* %base, <vscale x 2 x i32> %offsets, <vscale x 2 x i1> %mask) {
|
||||||
; CHECK-LABEL: masked_sgather_nxv2i16:
|
; CHECK-LABEL: masked_sgather_nxv2i16:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0, z0.d, uxtw]
|
; CHECK-NEXT: ld1sh { z0.d }, p0/z, [x0, z0.d, uxtw]
|
||||||
; CHECK-NEXT: ptrue p0.d
|
|
||||||
; CHECK-NEXT: sxth z0.d, p0/m, z0.d
|
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: ret
|
||||||
%offsets.zext = zext <vscale x 2 x i32> %offsets to <vscale x 2 x i64>
|
%offsets.zext = zext <vscale x 2 x i32> %offsets to <vscale x 2 x i64>
|
||||||
%byte_ptrs = getelementptr i8, i8* %base, <vscale x 2 x i64> %offsets.zext
|
%byte_ptrs = getelementptr i8, i8* %base, <vscale x 2 x i64> %offsets.zext
|
||||||
|
@ -126,9 +120,7 @@ define <vscale x 2 x i64> @masked_sgather_nxv2i16(i8* %base, <vscale x 2 x i32>
|
||||||
define <vscale x 2 x i64> @masked_sgather_nxv2i32(i8* %base, <vscale x 2 x i32> %offsets, <vscale x 2 x i1> %mask) {
|
define <vscale x 2 x i64> @masked_sgather_nxv2i32(i8* %base, <vscale x 2 x i32> %offsets, <vscale x 2 x i1> %mask) {
|
||||||
; CHECK-LABEL: masked_sgather_nxv2i32:
|
; CHECK-LABEL: masked_sgather_nxv2i32:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0, z0.d, uxtw]
|
; CHECK-NEXT: ld1sw { z0.d }, p0/z, [x0, z0.d, uxtw]
|
||||||
; CHECK-NEXT: ptrue p0.d
|
|
||||||
; CHECK-NEXT: sxtw z0.d, p0/m, z0.d
|
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: ret
|
||||||
%offsets.zext = zext <vscale x 2 x i32> %offsets to <vscale x 2 x i64>
|
%offsets.zext = zext <vscale x 2 x i32> %offsets to <vscale x 2 x i64>
|
||||||
%byte_ptrs = getelementptr i8, i8* %base, <vscale x 2 x i64> %offsets.zext
|
%byte_ptrs = getelementptr i8, i8* %base, <vscale x 2 x i64> %offsets.zext
|
||||||
|
@ -146,7 +138,6 @@ define <vscale x 4 x i32> @masked_gather_nxv4i8(i8* %base, <vscale x 4 x i32> %o
|
||||||
; CHECK-LABEL: masked_gather_nxv4i8:
|
; CHECK-LABEL: masked_gather_nxv4i8:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ld1b { z0.s }, p0/z, [x0, z0.s, uxtw]
|
; CHECK-NEXT: ld1b { z0.s }, p0/z, [x0, z0.s, uxtw]
|
||||||
; CHECK-NEXT: and z0.s, z0.s, #0xff
|
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: ret
|
||||||
%offsets.zext = zext <vscale x 4 x i32> %offsets to <vscale x 4 x i64>
|
%offsets.zext = zext <vscale x 4 x i32> %offsets to <vscale x 4 x i64>
|
||||||
%ptrs = getelementptr i8, i8* %base, <vscale x 4 x i64> %offsets.zext
|
%ptrs = getelementptr i8, i8* %base, <vscale x 4 x i64> %offsets.zext
|
||||||
|
@ -159,7 +150,6 @@ define <vscale x 4 x i32> @masked_gather_nxv4i16(i8* %base, <vscale x 4 x i32> %
|
||||||
; CHECK-LABEL: masked_gather_nxv4i16:
|
; CHECK-LABEL: masked_gather_nxv4i16:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0, z0.s, uxtw]
|
; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0, z0.s, uxtw]
|
||||||
; CHECK-NEXT: and z0.s, z0.s, #0xffff
|
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: ret
|
||||||
%offsets.zext = zext <vscale x 4 x i32> %offsets to <vscale x 4 x i64>
|
%offsets.zext = zext <vscale x 4 x i32> %offsets to <vscale x 4 x i64>
|
||||||
%byte_ptrs = getelementptr i8, i8* %base, <vscale x 4 x i64> %offsets.zext
|
%byte_ptrs = getelementptr i8, i8* %base, <vscale x 4 x i64> %offsets.zext
|
||||||
|
@ -208,9 +198,7 @@ define <vscale x 4 x float> @masked_gather_nxv4f32(i8* %base, <vscale x 4 x i32>
|
||||||
define <vscale x 4 x i32> @masked_sgather_nxv4i8(i8* %base, <vscale x 4 x i32> %offsets, <vscale x 4 x i1> %mask) {
|
define <vscale x 4 x i32> @masked_sgather_nxv4i8(i8* %base, <vscale x 4 x i32> %offsets, <vscale x 4 x i1> %mask) {
|
||||||
; CHECK-LABEL: masked_sgather_nxv4i8:
|
; CHECK-LABEL: masked_sgather_nxv4i8:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ld1b { z0.s }, p0/z, [x0, z0.s, uxtw]
|
; CHECK-NEXT: ld1sb { z0.s }, p0/z, [x0, z0.s, uxtw]
|
||||||
; CHECK-NEXT: ptrue p0.s
|
|
||||||
; CHECK-NEXT: sxtb z0.s, p0/m, z0.s
|
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: ret
|
||||||
%offsets.zext = zext <vscale x 4 x i32> %offsets to <vscale x 4 x i64>
|
%offsets.zext = zext <vscale x 4 x i32> %offsets to <vscale x 4 x i64>
|
||||||
%ptrs = getelementptr i8, i8* %base, <vscale x 4 x i64> %offsets.zext
|
%ptrs = getelementptr i8, i8* %base, <vscale x 4 x i64> %offsets.zext
|
||||||
|
@ -222,9 +210,7 @@ define <vscale x 4 x i32> @masked_sgather_nxv4i8(i8* %base, <vscale x 4 x i32> %
|
||||||
define <vscale x 4 x i32> @masked_sgather_nxv4i16(i8* %base, <vscale x 4 x i32> %offsets, <vscale x 4 x i1> %mask) {
|
define <vscale x 4 x i32> @masked_sgather_nxv4i16(i8* %base, <vscale x 4 x i32> %offsets, <vscale x 4 x i1> %mask) {
|
||||||
; CHECK-LABEL: masked_sgather_nxv4i16:
|
; CHECK-LABEL: masked_sgather_nxv4i16:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0, z0.s, uxtw]
|
; CHECK-NEXT: ld1sh { z0.s }, p0/z, [x0, z0.s, uxtw]
|
||||||
; CHECK-NEXT: ptrue p0.s
|
|
||||||
; CHECK-NEXT: sxth z0.s, p0/m, z0.s
|
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: ret
|
||||||
%offsets.zext = zext <vscale x 4 x i32> %offsets to <vscale x 4 x i64>
|
%offsets.zext = zext <vscale x 4 x i32> %offsets to <vscale x 4 x i64>
|
||||||
%byte_ptrs = getelementptr i8, i8* %base, <vscale x 4 x i64> %offsets.zext
|
%byte_ptrs = getelementptr i8, i8* %base, <vscale x 4 x i64> %offsets.zext
|
||||||
|
|
|
@ -1,11 +1,11 @@
|
||||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||||
; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve -aarch64-enable-mgather-combine=0 < %s | FileCheck %s
|
; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve -aarch64-enable-mgather-combine=0 < %s | FileCheck %s
|
||||||
|
; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve -aarch64-enable-mgather-combine=1 < %s | FileCheck %s
|
||||||
|
|
||||||
define <vscale x 2 x i64> @masked_gather_nxv2i16(i16* %base, <vscale x 2 x i64> %offsets, <vscale x 2 x i1> %mask) {
|
define <vscale x 2 x i64> @masked_gather_nxv2i16(i16* %base, <vscale x 2 x i64> %offsets, <vscale x 2 x i1> %mask) {
|
||||||
; CHECK-LABEL: masked_gather_nxv2i16:
|
; CHECK-LABEL: masked_gather_nxv2i16:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0, z0.d, lsl #1]
|
; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0, z0.d, lsl #1]
|
||||||
; CHECK-NEXT: and z0.d, z0.d, #0xffff
|
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: ret
|
||||||
%ptrs = getelementptr i16, i16* %base, <vscale x 2 x i64> %offsets
|
%ptrs = getelementptr i16, i16* %base, <vscale x 2 x i64> %offsets
|
||||||
%vals = call <vscale x 2 x i16> @llvm.masked.gather.nxv2i16(<vscale x 2 x i16*> %ptrs, i32 2, <vscale x 2 x i1> %mask, <vscale x 2 x i16> undef)
|
%vals = call <vscale x 2 x i16> @llvm.masked.gather.nxv2i16(<vscale x 2 x i16*> %ptrs, i32 2, <vscale x 2 x i1> %mask, <vscale x 2 x i16> undef)
|
||||||
|
@ -17,7 +17,6 @@ define <vscale x 2 x i64> @masked_gather_nxv2i32(i32* %base, <vscale x 2 x i64>
|
||||||
; CHECK-LABEL: masked_gather_nxv2i32:
|
; CHECK-LABEL: masked_gather_nxv2i32:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0, z0.d, lsl #2]
|
; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0, z0.d, lsl #2]
|
||||||
; CHECK-NEXT: and z0.d, z0.d, #0xffffffff
|
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: ret
|
||||||
%ptrs = getelementptr i32, i32* %base, <vscale x 2 x i64> %offsets
|
%ptrs = getelementptr i32, i32* %base, <vscale x 2 x i64> %offsets
|
||||||
%vals = call <vscale x 2 x i32> @llvm.masked.gather.nxv2i32(<vscale x 2 x i32*> %ptrs, i32 4, <vscale x 2 x i1> %mask, <vscale x 2 x i32> undef)
|
%vals = call <vscale x 2 x i32> @llvm.masked.gather.nxv2i32(<vscale x 2 x i32*> %ptrs, i32 4, <vscale x 2 x i1> %mask, <vscale x 2 x i32> undef)
|
||||||
|
@ -68,9 +67,7 @@ define <vscale x 2 x double> @masked_gather_nxv2f64(double* %base, <vscale x 2 x
|
||||||
define <vscale x 2 x i64> @masked_sgather_nxv2i16(i16* %base, <vscale x 2 x i64> %offsets, <vscale x 2 x i1> %mask) {
|
define <vscale x 2 x i64> @masked_sgather_nxv2i16(i16* %base, <vscale x 2 x i64> %offsets, <vscale x 2 x i1> %mask) {
|
||||||
; CHECK-LABEL: masked_sgather_nxv2i16:
|
; CHECK-LABEL: masked_sgather_nxv2i16:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0, z0.d, lsl #1]
|
; CHECK-NEXT: ld1sh { z0.d }, p0/z, [x0, z0.d, lsl #1]
|
||||||
; CHECK-NEXT: ptrue p0.d
|
|
||||||
; CHECK-NEXT: sxth z0.d, p0/m, z0.d
|
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: ret
|
||||||
%ptrs = getelementptr i16, i16* %base, <vscale x 2 x i64> %offsets
|
%ptrs = getelementptr i16, i16* %base, <vscale x 2 x i64> %offsets
|
||||||
%vals = call <vscale x 2 x i16> @llvm.masked.gather.nxv2i16(<vscale x 2 x i16*> %ptrs, i32 2, <vscale x 2 x i1> %mask, <vscale x 2 x i16> undef)
|
%vals = call <vscale x 2 x i16> @llvm.masked.gather.nxv2i16(<vscale x 2 x i16*> %ptrs, i32 2, <vscale x 2 x i1> %mask, <vscale x 2 x i16> undef)
|
||||||
|
@ -81,9 +78,7 @@ define <vscale x 2 x i64> @masked_sgather_nxv2i16(i16* %base, <vscale x 2 x i64>
|
||||||
define <vscale x 2 x i64> @masked_sgather_nxv2i32(i32* %base, <vscale x 2 x i64> %offsets, <vscale x 2 x i1> %mask) {
|
define <vscale x 2 x i64> @masked_sgather_nxv2i32(i32* %base, <vscale x 2 x i64> %offsets, <vscale x 2 x i1> %mask) {
|
||||||
; CHECK-LABEL: masked_sgather_nxv2i32:
|
; CHECK-LABEL: masked_sgather_nxv2i32:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0, z0.d, lsl #2]
|
; CHECK-NEXT: ld1sw { z0.d }, p0/z, [x0, z0.d, lsl #2]
|
||||||
; CHECK-NEXT: ptrue p0.d
|
|
||||||
; CHECK-NEXT: sxtw z0.d, p0/m, z0.d
|
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: ret
|
||||||
%ptrs = getelementptr i32, i32* %base, <vscale x 2 x i64> %offsets
|
%ptrs = getelementptr i32, i32* %base, <vscale x 2 x i64> %offsets
|
||||||
%vals = call <vscale x 2 x i32> @llvm.masked.gather.nxv2i32(<vscale x 2 x i32*> %ptrs, i32 4, <vscale x 2 x i1> %mask, <vscale x 2 x i32> undef)
|
%vals = call <vscale x 2 x i32> @llvm.masked.gather.nxv2i32(<vscale x 2 x i32*> %ptrs, i32 4, <vscale x 2 x i1> %mask, <vscale x 2 x i32> undef)
|
||||||
|
|
|
@ -1,11 +1,11 @@
|
||||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||||
; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve -aarch64-enable-mgather-combine=0 < %s | FileCheck %s
|
; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve -aarch64-enable-mgather-combine=0 < %s | FileCheck %s
|
||||||
|
; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve -aarch64-enable-mgather-combine=1 < %s | FileCheck %s
|
||||||
|
|
||||||
define <vscale x 2 x i64> @masked_gather_nxv2i8(i8* %base, <vscale x 2 x i64> %offsets, <vscale x 2 x i1> %mask) {
|
define <vscale x 2 x i64> @masked_gather_nxv2i8(i8* %base, <vscale x 2 x i64> %offsets, <vscale x 2 x i1> %mask) {
|
||||||
; CHECK-LABEL: masked_gather_nxv2i8:
|
; CHECK-LABEL: masked_gather_nxv2i8:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ld1b { z0.d }, p0/z, [x0, z0.d]
|
; CHECK-NEXT: ld1b { z0.d }, p0/z, [x0, z0.d]
|
||||||
; CHECK-NEXT: and z0.d, z0.d, #0xff
|
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: ret
|
||||||
%ptrs = getelementptr i8, i8* %base, <vscale x 2 x i64> %offsets
|
%ptrs = getelementptr i8, i8* %base, <vscale x 2 x i64> %offsets
|
||||||
%vals = call <vscale x 2 x i8> @llvm.masked.gather.nxv2i8(<vscale x 2 x i8*> %ptrs, i32 1, <vscale x 2 x i1> %mask, <vscale x 2 x i8> undef)
|
%vals = call <vscale x 2 x i8> @llvm.masked.gather.nxv2i8(<vscale x 2 x i8*> %ptrs, i32 1, <vscale x 2 x i1> %mask, <vscale x 2 x i8> undef)
|
||||||
|
@ -17,7 +17,6 @@ define <vscale x 2 x i64> @masked_gather_nxv2i16(i8* %base, <vscale x 2 x i64> %
|
||||||
; CHECK-LABEL: masked_gather_nxv2i16:
|
; CHECK-LABEL: masked_gather_nxv2i16:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0, z0.d]
|
; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0, z0.d]
|
||||||
; CHECK-NEXT: and z0.d, z0.d, #0xffff
|
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: ret
|
||||||
%byte_ptrs = getelementptr i8, i8* %base, <vscale x 2 x i64> %offsets
|
%byte_ptrs = getelementptr i8, i8* %base, <vscale x 2 x i64> %offsets
|
||||||
%ptrs = bitcast <vscale x 2 x i8*> %byte_ptrs to <vscale x 2 x i16*>
|
%ptrs = bitcast <vscale x 2 x i8*> %byte_ptrs to <vscale x 2 x i16*>
|
||||||
|
@ -30,7 +29,6 @@ define <vscale x 2 x i64> @masked_gather_nxv2i32(i8* %base, <vscale x 2 x i64> %
|
||||||
; CHECK-LABEL: masked_gather_nxv2i32:
|
; CHECK-LABEL: masked_gather_nxv2i32:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0, z0.d]
|
; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0, z0.d]
|
||||||
; CHECK-NEXT: and z0.d, z0.d, #0xffffffff
|
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: ret
|
||||||
%byte_ptrs = getelementptr i8, i8* %base, <vscale x 2 x i64> %offsets
|
%byte_ptrs = getelementptr i8, i8* %base, <vscale x 2 x i64> %offsets
|
||||||
%ptrs = bitcast <vscale x 2 x i8*> %byte_ptrs to <vscale x 2 x i32*>
|
%ptrs = bitcast <vscale x 2 x i8*> %byte_ptrs to <vscale x 2 x i32*>
|
||||||
|
@ -86,9 +84,7 @@ define <vscale x 2 x double> @masked_gather_nxv2f64(i8* %base, <vscale x 2 x i64
|
||||||
define <vscale x 2 x i64> @masked_sgather_nxv2i8(i8* %base, <vscale x 2 x i64> %offsets, <vscale x 2 x i1> %mask) {
|
define <vscale x 2 x i64> @masked_sgather_nxv2i8(i8* %base, <vscale x 2 x i64> %offsets, <vscale x 2 x i1> %mask) {
|
||||||
; CHECK-LABEL: masked_sgather_nxv2i8:
|
; CHECK-LABEL: masked_sgather_nxv2i8:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ld1b { z0.d }, p0/z, [x0, z0.d]
|
; CHECK-NEXT: ld1sb { z0.d }, p0/z, [x0, z0.d]
|
||||||
; CHECK-NEXT: ptrue p0.d
|
|
||||||
; CHECK-NEXT: sxtb z0.d, p0/m, z0.d
|
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: ret
|
||||||
%ptrs = getelementptr i8, i8* %base, <vscale x 2 x i64> %offsets
|
%ptrs = getelementptr i8, i8* %base, <vscale x 2 x i64> %offsets
|
||||||
%vals = call <vscale x 2 x i8> @llvm.masked.gather.nxv2i8(<vscale x 2 x i8*> %ptrs, i32 1, <vscale x 2 x i1> %mask, <vscale x 2 x i8> undef)
|
%vals = call <vscale x 2 x i8> @llvm.masked.gather.nxv2i8(<vscale x 2 x i8*> %ptrs, i32 1, <vscale x 2 x i1> %mask, <vscale x 2 x i8> undef)
|
||||||
|
@ -99,9 +95,7 @@ define <vscale x 2 x i64> @masked_sgather_nxv2i8(i8* %base, <vscale x 2 x i64> %
|
||||||
define <vscale x 2 x i64> @masked_sgather_nxv2i16(i8* %base, <vscale x 2 x i64> %offsets, <vscale x 2 x i1> %mask) {
|
define <vscale x 2 x i64> @masked_sgather_nxv2i16(i8* %base, <vscale x 2 x i64> %offsets, <vscale x 2 x i1> %mask) {
|
||||||
; CHECK-LABEL: masked_sgather_nxv2i16:
|
; CHECK-LABEL: masked_sgather_nxv2i16:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0, z0.d]
|
; CHECK-NEXT: ld1sh { z0.d }, p0/z, [x0, z0.d]
|
||||||
; CHECK-NEXT: ptrue p0.d
|
|
||||||
; CHECK-NEXT: sxth z0.d, p0/m, z0.d
|
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: ret
|
||||||
%byte_ptrs = getelementptr i8, i8* %base, <vscale x 2 x i64> %offsets
|
%byte_ptrs = getelementptr i8, i8* %base, <vscale x 2 x i64> %offsets
|
||||||
%ptrs = bitcast <vscale x 2 x i8*> %byte_ptrs to <vscale x 2 x i16*>
|
%ptrs = bitcast <vscale x 2 x i8*> %byte_ptrs to <vscale x 2 x i16*>
|
||||||
|
@ -113,9 +107,7 @@ define <vscale x 2 x i64> @masked_sgather_nxv2i16(i8* %base, <vscale x 2 x i64>
|
||||||
define <vscale x 2 x i64> @masked_sgather_nxv2i32(i8* %base, <vscale x 2 x i64> %offsets, <vscale x 2 x i1> %mask) {
|
define <vscale x 2 x i64> @masked_sgather_nxv2i32(i8* %base, <vscale x 2 x i64> %offsets, <vscale x 2 x i1> %mask) {
|
||||||
; CHECK-LABEL: masked_sgather_nxv2i32:
|
; CHECK-LABEL: masked_sgather_nxv2i32:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0, z0.d]
|
; CHECK-NEXT: ld1sw { z0.d }, p0/z, [x0, z0.d]
|
||||||
; CHECK-NEXT: ptrue p0.d
|
|
||||||
; CHECK-NEXT: sxtw z0.d, p0/m, z0.d
|
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: ret
|
||||||
%byte_ptrs = getelementptr i8, i8* %base, <vscale x 2 x i64> %offsets
|
%byte_ptrs = getelementptr i8, i8* %base, <vscale x 2 x i64> %offsets
|
||||||
%ptrs = bitcast <vscale x 2 x i8*> %byte_ptrs to <vscale x 2 x i32*>
|
%ptrs = bitcast <vscale x 2 x i8*> %byte_ptrs to <vscale x 2 x i32*>
|
||||||
|
|
|
@ -1,5 +1,46 @@
|
||||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||||
; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve < %s | FileCheck %s
|
; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve -aarch64-enable-mgather-combine=0 < %s | FileCheck %s
|
||||||
|
; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve -aarch64-enable-mgather-combine=1 < %s | FileCheck %s
|
||||||
|
|
||||||
|
; Test for multiple uses of the mgather where the s/zext should not be combined
|
||||||
|
|
||||||
|
define <vscale x 2 x i64> @masked_sgather_sext(i8* %base, <vscale x 2 x i64> %offsets, <vscale x 2 x i1> %mask, <vscale x 2 x i8> %vals) {
|
||||||
|
; CHECK-LABEL: masked_sgather_sext:
|
||||||
|
; CHECK: // %bb.0:
|
||||||
|
; CHECK-NEXT: ld1sb { z0.d }, p0/z, [x0, z0.d]
|
||||||
|
; CHECK-NEXT: ptrue p0.d
|
||||||
|
; CHECK-NEXT: sxtb z2.d, p0/m, z0.d
|
||||||
|
; CHECK-NEXT: add z0.d, z0.d, z1.d
|
||||||
|
; CHECK-NEXT: sxtb z0.d, p0/m, z0.d
|
||||||
|
; CHECK-NEXT: mul z0.d, p0/m, z0.d, z2.d
|
||||||
|
; CHECK-NEXT: ret
|
||||||
|
%ptrs = getelementptr i8, i8* %base, <vscale x 2 x i64> %offsets
|
||||||
|
%data = call <vscale x 2 x i8> @llvm.masked.gather.nxv2i8(<vscale x 2 x i8*> %ptrs, i32 1, <vscale x 2 x i1> %mask, <vscale x 2 x i8> undef)
|
||||||
|
%data.sext = sext <vscale x 2 x i8> %data to <vscale x 2 x i64>
|
||||||
|
%add = add <vscale x 2 x i8> %data, %vals
|
||||||
|
%add.sext = sext <vscale x 2 x i8> %add to <vscale x 2 x i64>
|
||||||
|
%mul = mul <vscale x 2 x i64> %data.sext, %add.sext
|
||||||
|
ret <vscale x 2 x i64> %mul
|
||||||
|
}
|
||||||
|
|
||||||
|
define <vscale x 2 x i64> @masked_sgather_zext(i8* %base, <vscale x 2 x i64> %offsets, <vscale x 2 x i1> %mask, <vscale x 2 x i8> %vals) {
|
||||||
|
; CHECK-LABEL: masked_sgather_zext:
|
||||||
|
; CHECK: // %bb.0:
|
||||||
|
; CHECK-NEXT: ld1sb { z0.d }, p0/z, [x0, z0.d]
|
||||||
|
; CHECK-NEXT: ptrue p0.d
|
||||||
|
; CHECK-NEXT: add z1.d, z0.d, z1.d
|
||||||
|
; CHECK-NEXT: and z0.d, z0.d, #0xff
|
||||||
|
; CHECK-NEXT: and z1.d, z1.d, #0xff
|
||||||
|
; CHECK-NEXT: mul z0.d, p0/m, z0.d, z1.d
|
||||||
|
; CHECK-NEXT: ret
|
||||||
|
%ptrs = getelementptr i8, i8* %base, <vscale x 2 x i64> %offsets
|
||||||
|
%data = call <vscale x 2 x i8> @llvm.masked.gather.nxv2i8(<vscale x 2 x i8*> %ptrs, i32 1, <vscale x 2 x i1> %mask, <vscale x 2 x i8> undef)
|
||||||
|
%data.zext = zext <vscale x 2 x i8> %data to <vscale x 2 x i64>
|
||||||
|
%add = add <vscale x 2 x i8> %data, %vals
|
||||||
|
%add.zext = zext <vscale x 2 x i8> %add to <vscale x 2 x i64>
|
||||||
|
%mul = mul <vscale x 2 x i64> %data.zext, %add.zext
|
||||||
|
ret <vscale x 2 x i64> %mul
|
||||||
|
}
|
||||||
|
|
||||||
; Tests that exercise various type legalisation scenarios for ISD::MGATHER.
|
; Tests that exercise various type legalisation scenarios for ISD::MGATHER.
|
||||||
|
|
||||||
|
@ -7,7 +48,7 @@
|
||||||
define <vscale x 2 x i32> @masked_gather_nxv2i32(<vscale x 2 x i32*> %ptrs, <vscale x 2 x i1> %mask) {
|
define <vscale x 2 x i32> @masked_gather_nxv2i32(<vscale x 2 x i32*> %ptrs, <vscale x 2 x i1> %mask) {
|
||||||
; CHECK-LABEL: masked_gather_nxv2i32:
|
; CHECK-LABEL: masked_gather_nxv2i32:
|
||||||
; CHECK-DAG: mov x8, xzr
|
; CHECK-DAG: mov x8, xzr
|
||||||
; CHECK-DAG: ld1w { z0.d }, p0/z, [x8, z0.d]
|
; CHECK-DAG: ld1sw { z0.d }, p0/z, [x8, z0.d]
|
||||||
; CHECK: ret
|
; CHECK: ret
|
||||||
%data = call <vscale x 2 x i32> @llvm.masked.gather.nxv2i32(<vscale x 2 x i32*> %ptrs, i32 4, <vscale x 2 x i1> %mask, <vscale x 2 x i32> undef)
|
%data = call <vscale x 2 x i32> @llvm.masked.gather.nxv2i32(<vscale x 2 x i32*> %ptrs, i32 4, <vscale x 2 x i1> %mask, <vscale x 2 x i32> undef)
|
||||||
ret <vscale x 2 x i32> %data
|
ret <vscale x 2 x i32> %data
|
||||||
|
@ -41,8 +82,8 @@ define <vscale x 4 x i32> @masked_sgather_nxv4i8(<vscale x 4 x i8*> %ptrs, <vsca
|
||||||
; CHECK-NEXT: mov x8, xzr
|
; CHECK-NEXT: mov x8, xzr
|
||||||
; CHECK-NEXT: zip2 p2.s, p0.s, p1.s
|
; CHECK-NEXT: zip2 p2.s, p0.s, p1.s
|
||||||
; CHECK-NEXT: zip1 p0.s, p0.s, p1.s
|
; CHECK-NEXT: zip1 p0.s, p0.s, p1.s
|
||||||
; CHECK-NEXT: ld1b { z1.d }, p2/z, [x8, z1.d]
|
; CHECK-NEXT: ld1sb { z1.d }, p2/z, [x8, z1.d]
|
||||||
; CHECK-NEXT: ld1b { z0.d }, p0/z, [x8, z0.d]
|
; CHECK-NEXT: ld1sb { z0.d }, p0/z, [x8, z0.d]
|
||||||
; CHECK-NEXT: ptrue p0.s
|
; CHECK-NEXT: ptrue p0.s
|
||||||
; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s
|
; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s
|
||||||
; CHECK-NEXT: sxtb z0.s, p0/m, z0.s
|
; CHECK-NEXT: sxtb z0.s, p0/m, z0.s
|
||||||
|
|
Loading…
Reference in New Issue