forked from OSchip/llvm-project
[SystemZ] Patchset for expanding memcpy/memset using at most two stores.
* Set MaxStoresPerMemcpy and MaxStoresPerMemset to 2. * Optimize stores of replicated values in SystemZ::combineSTORE(). This handles the now expanded memory operations and as well some other pre-existing cases. * Reject a big displacement in isLegalAddressingMode() for a vector type. * Return true from shouldConsiderGEPOffsetSplit(). Reviewed By: Ulrich Weigand Differential Revision: https://reviews.llvm.org/D122105
This commit is contained in:
parent
afc21c7e79
commit
eaa78035c6
|
@ -3486,7 +3486,8 @@ public:
|
|||
/// Return true if the number of memory ops is below the threshold (Limit).
|
||||
/// It returns the types of the sequence of memory ops to perform
|
||||
/// memset / memcpy by reference.
|
||||
bool findOptimalMemOpLowering(std::vector<EVT> &MemOps, unsigned Limit,
|
||||
virtual bool
|
||||
findOptimalMemOpLowering(std::vector<EVT> &MemOps, unsigned Limit,
|
||||
const MemOp &Op, unsigned DstAS, unsigned SrcAS,
|
||||
const AttributeList &FuncAttributes) const;
|
||||
|
||||
|
|
|
@ -669,7 +669,7 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
|
|||
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
|
||||
|
||||
// We want to use MVC in preference to even a single load/store pair.
|
||||
MaxStoresPerMemcpy = 0;
|
||||
MaxStoresPerMemcpy = Subtarget.hasVector() ? 2 : 0;
|
||||
MaxStoresPerMemcpyOptSize = 0;
|
||||
|
||||
// The main memset sequence is a byte store followed by an MVC.
|
||||
|
@ -677,7 +677,7 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
|
|||
// generated by target-independent code don't when the byte value is
|
||||
// variable. E.g. "STC <reg>;MHI <reg>,257;STH <reg>" is not better
|
||||
// than "STC;MVC". Handle the choice in target-specific code instead.
|
||||
MaxStoresPerMemset = 0;
|
||||
MaxStoresPerMemset = Subtarget.hasVector() ? 2 : 0;
|
||||
MaxStoresPerMemsetOptSize = 0;
|
||||
|
||||
// Default to having -disable-strictnode-mutation on
|
||||
|
@ -793,14 +793,17 @@ bool SystemZVectorConstantInfo::isVectorConstantLegal(
|
|||
return tryValue(SplatBitsZ | Middle);
|
||||
}
|
||||
|
||||
SystemZVectorConstantInfo::SystemZVectorConstantInfo(APFloat FPImm) {
|
||||
IntBits = FPImm.bitcastToAPInt().zextOrSelf(128);
|
||||
isFP128 = (&FPImm.getSemantics() == &APFloat::IEEEquad());
|
||||
SplatBits = FPImm.bitcastToAPInt();
|
||||
unsigned Width = SplatBits.getBitWidth();
|
||||
IntBits <<= (SystemZ::VectorBits - Width);
|
||||
SystemZVectorConstantInfo::SystemZVectorConstantInfo(APInt IntImm) {
|
||||
if (IntImm.isSingleWord()) {
|
||||
IntBits = APInt(128, IntImm.getZExtValue());
|
||||
IntBits <<= (SystemZ::VectorBits - IntImm.getBitWidth());
|
||||
} else
|
||||
IntBits = IntImm;
|
||||
assert(IntBits.getBitWidth() == 128 && "Unsupported APInt.");
|
||||
|
||||
// Find the smallest splat.
|
||||
SplatBits = IntImm;
|
||||
unsigned Width = SplatBits.getBitWidth();
|
||||
while (Width > 8) {
|
||||
unsigned HalfSize = Width / 2;
|
||||
APInt HighValue = SplatBits.lshr(HalfSize).trunc(HalfSize);
|
||||
|
@ -976,7 +979,8 @@ bool SystemZTargetLowering::isLegalAddressingMode(const DataLayout &DL,
|
|||
if (!isInt<20>(AM.BaseOffs))
|
||||
return false;
|
||||
|
||||
AddressingMode SupportedAM(true, true);
|
||||
bool RequireD12 = Subtarget.hasVector() && Ty->isVectorTy();
|
||||
AddressingMode SupportedAM(!RequireD12, true);
|
||||
if (I != nullptr)
|
||||
SupportedAM = supportedAddressingMode(I, Subtarget.hasVector());
|
||||
|
||||
|
@ -991,6 +995,28 @@ bool SystemZTargetLowering::isLegalAddressingMode(const DataLayout &DL,
|
|||
return AM.Scale == 0 || AM.Scale == 1;
|
||||
}
|
||||
|
||||
bool SystemZTargetLowering::findOptimalMemOpLowering(
|
||||
std::vector<EVT> &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS,
|
||||
unsigned SrcAS, const AttributeList &FuncAttributes) const {
|
||||
const int MVCFastLen = 16;
|
||||
|
||||
// Don't expand Op into scalar loads/stores in these cases:
|
||||
if (Op.isMemcpy() && Op.allowOverlap() && Op.size() <= MVCFastLen)
|
||||
return false; // Small memcpy: Use MVC
|
||||
if (Op.isMemset() && Op.size() - 1 <= MVCFastLen)
|
||||
return false; // Small memset (first byte with STC/MVI): Use MVC
|
||||
if (Op.isZeroMemset())
|
||||
return false; // Memset zero: Use XC
|
||||
|
||||
return TargetLowering::findOptimalMemOpLowering(MemOps, Limit, Op, DstAS,
|
||||
SrcAS, FuncAttributes);
|
||||
}
|
||||
|
||||
EVT SystemZTargetLowering::getOptimalMemOpType(const MemOp &Op,
|
||||
const AttributeList &FuncAttributes) const {
|
||||
return Subtarget.hasVector() ? MVT::v2i64 : MVT::Other;
|
||||
}
|
||||
|
||||
bool SystemZTargetLowering::isTruncateFree(Type *FromType, Type *ToType) const {
|
||||
if (!FromType->isIntegerTy() || !ToType->isIntegerTy())
|
||||
return false;
|
||||
|
@ -6329,6 +6355,23 @@ static bool isVectorElementSwap(ArrayRef<int> M, EVT VT) {
|
|||
return true;
|
||||
}
|
||||
|
||||
static bool isOnlyUsedByStores(SDValue StoredVal, SelectionDAG &DAG) {
|
||||
for (auto *U : StoredVal->uses()) {
|
||||
if (StoreSDNode *ST = dyn_cast<StoreSDNode>(U)) {
|
||||
EVT CurrMemVT = ST->getMemoryVT().getScalarType();
|
||||
if (CurrMemVT.isRound() && CurrMemVT.getStoreSize() <= 16)
|
||||
continue;
|
||||
} else if (isa<BuildVectorSDNode>(U)) {
|
||||
SDValue BuildVector = SDValue(U, 0);
|
||||
if (DAG.isSplatValue(BuildVector, true/*AllowUndefs*/) &&
|
||||
isOnlyUsedByStores(BuildVector, DAG))
|
||||
continue;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
SDValue SystemZTargetLowering::combineSTORE(
|
||||
SDNode *N, DAGCombinerInfo &DCI) const {
|
||||
SelectionDAG &DAG = DCI.DAG;
|
||||
|
@ -6387,6 +6430,82 @@ SDValue SystemZTargetLowering::combineSTORE(
|
|||
}
|
||||
}
|
||||
|
||||
// Replicate a reg or immediate with VREP instead of scalar multiply or
|
||||
// immediate load. It seems best to do this during the first DAGCombine as
|
||||
// it is straight-forward to handle the zero-extend node in the initial
|
||||
// DAG, and also not worry about the keeping the new MemVT legal (e.g. when
|
||||
// extracting an i16 element from a v16i8 vector).
|
||||
if (Subtarget.hasVector() && DCI.Level == BeforeLegalizeTypes &&
|
||||
isOnlyUsedByStores(Op1, DAG)) {
|
||||
SDValue Word = SDValue();
|
||||
EVT WordVT;
|
||||
|
||||
// Find a replicated immediate and return it if found in Word and its
|
||||
// type in WordVT.
|
||||
auto FindReplicatedImm = [&](ConstantSDNode *C, unsigned TotBytes) {
|
||||
// Some constants are better handled with a scalar store.
|
||||
if (C->getAPIntValue().getBitWidth() > 64 || C->isAllOnes() ||
|
||||
isInt<16>(C->getSExtValue()) || MemVT.getStoreSize() <= 2)
|
||||
return;
|
||||
SystemZVectorConstantInfo VCI(APInt(TotBytes * 8, C->getZExtValue()));
|
||||
if (VCI.isVectorConstantLegal(Subtarget) &&
|
||||
VCI.Opcode == SystemZISD::REPLICATE) {
|
||||
Word = DAG.getConstant(VCI.OpVals[0], SDLoc(SN), MVT::i32);
|
||||
WordVT = VCI.VecVT.getScalarType();
|
||||
}
|
||||
};
|
||||
|
||||
// Find a replicated register and return it if found in Word and its type
|
||||
// in WordVT.
|
||||
auto FindReplicatedReg = [&](SDValue MulOp) {
|
||||
EVT MulVT = MulOp.getValueType();
|
||||
if (MulOp->getOpcode() == ISD::MUL &&
|
||||
(MulVT == MVT::i16 || MulVT == MVT::i32 || MulVT == MVT::i64)) {
|
||||
// Find a zero extended value and its type.
|
||||
SDValue LHS = MulOp->getOperand(0);
|
||||
if (LHS->getOpcode() == ISD::ZERO_EXTEND)
|
||||
WordVT = LHS->getOperand(0).getValueType();
|
||||
else if (LHS->getOpcode() == ISD::AssertZext)
|
||||
WordVT = cast<VTSDNode>(LHS->getOperand(1))->getVT();
|
||||
else
|
||||
return;
|
||||
// Find a replicating constant, e.g. 0x00010001.
|
||||
if (auto *C = dyn_cast<ConstantSDNode>(MulOp->getOperand(1))) {
|
||||
SystemZVectorConstantInfo VCI(
|
||||
APInt(MulVT.getSizeInBits(), C->getZExtValue()));
|
||||
if (VCI.isVectorConstantLegal(Subtarget) &&
|
||||
VCI.Opcode == SystemZISD::REPLICATE && VCI.OpVals[0] == 1 &&
|
||||
WordVT == VCI.VecVT.getScalarType())
|
||||
Word = DAG.getZExtOrTrunc(LHS->getOperand(0), SDLoc(SN), WordVT);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
if (isa<BuildVectorSDNode>(Op1) &&
|
||||
DAG.isSplatValue(Op1, true/*AllowUndefs*/)) {
|
||||
SDValue SplatVal = Op1->getOperand(0);
|
||||
if (auto *C = dyn_cast<ConstantSDNode>(SplatVal))
|
||||
FindReplicatedImm(C, SplatVal.getValueType().getStoreSize());
|
||||
else
|
||||
FindReplicatedReg(SplatVal);
|
||||
} else {
|
||||
if (auto *C = dyn_cast<ConstantSDNode>(Op1))
|
||||
FindReplicatedImm(C, MemVT.getStoreSize());
|
||||
else
|
||||
FindReplicatedReg(Op1);
|
||||
}
|
||||
|
||||
if (Word != SDValue()) {
|
||||
assert(MemVT.getSizeInBits() % WordVT.getSizeInBits() == 0 &&
|
||||
"Bad type handling");
|
||||
unsigned NumElts = MemVT.getSizeInBits() / WordVT.getSizeInBits();
|
||||
EVT SplatVT = EVT::getVectorVT(*DAG.getContext(), WordVT, NumElts);
|
||||
SDValue SplatVal = DAG.getSplatVector(SplatVT, SDLoc(SN), Word);
|
||||
return DAG.getStore(SN->getChain(), SDLoc(SN), SplatVal,
|
||||
SN->getBasePtr(), SN->getMemOperand());
|
||||
}
|
||||
}
|
||||
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
|
|
|
@ -457,6 +457,12 @@ public:
|
|||
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, Align Alignment,
|
||||
MachineMemOperand::Flags Flags,
|
||||
bool *Fast) const override;
|
||||
bool
|
||||
findOptimalMemOpLowering(std::vector<EVT> &MemOps, unsigned Limit,
|
||||
const MemOp &Op, unsigned DstAS, unsigned SrcAS,
|
||||
const AttributeList &FuncAttributes) const override;
|
||||
EVT getOptimalMemOpType(const MemOp &Op,
|
||||
const AttributeList &FuncAttributes) const override;
|
||||
bool isTruncateFree(Type *, Type *) const override;
|
||||
bool isTruncateFree(EVT, EVT) const override;
|
||||
|
||||
|
@ -467,6 +473,8 @@ public:
|
|||
return VT == MVT::i32 || VT == MVT::i64;
|
||||
}
|
||||
|
||||
bool shouldConsiderGEPOffsetSplit() const override { return true; }
|
||||
|
||||
const char *getTargetNodeName(unsigned Opcode) const override;
|
||||
std::pair<unsigned, const TargetRegisterClass *>
|
||||
getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
|
||||
|
@ -767,12 +775,15 @@ private:
|
|||
APInt SplatUndef; // Bits correspoding to undef operands of the BVN.
|
||||
unsigned SplatBitSize = 0;
|
||||
bool isFP128 = false;
|
||||
|
||||
public:
|
||||
unsigned Opcode = 0;
|
||||
SmallVector<unsigned, 2> OpVals;
|
||||
MVT VecVT;
|
||||
SystemZVectorConstantInfo(APFloat FPImm);
|
||||
SystemZVectorConstantInfo(APInt IntImm);
|
||||
SystemZVectorConstantInfo(APFloat FPImm)
|
||||
: SystemZVectorConstantInfo(FPImm.bitcastToAPInt()) {
|
||||
isFP128 = (&FPImm.getSemantics() == &APFloat::IEEEquad());
|
||||
}
|
||||
SystemZVectorConstantInfo(BuildVectorSDNode *BVN);
|
||||
bool isVectorConstantLegal(const SystemZSubtarget &Subtarget);
|
||||
};
|
||||
|
|
|
@ -0,0 +1,24 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z15 | FileCheck %s
|
||||
;
|
||||
; Test that the big offsets are handled by only one AGFI.
|
||||
|
||||
define void @fun(i64* %Src, i64* %Dst) {
|
||||
; CHECK-LABEL: fun:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: agfi %r2, 1048576
|
||||
; CHECK-NEXT: lg %r0, 0(%r2)
|
||||
; CHECK-NEXT: stg %r0, 0(%r3)
|
||||
; CHECK-NEXT: lg %r0, 8(%r2)
|
||||
; CHECK-NEXT: stg %r0, 0(%r3)
|
||||
; CHECK-NEXT: br %r14
|
||||
%S0 = getelementptr i64, i64* %Src, i64 131072
|
||||
%V0 = load i64, i64* %S0
|
||||
store volatile i64 %V0, i64* %Dst
|
||||
|
||||
%S1 = getelementptr i64, i64* %Src, i64 131073
|
||||
%V1 = load i64, i64* %S1
|
||||
store volatile i64 %V1, i64* %Dst
|
||||
|
||||
ret void
|
||||
}
|
|
@ -0,0 +1,29 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z15 | FileCheck %s
|
||||
;
|
||||
; Test that DAGCombiner does not change the addressing as the displacements
|
||||
; are known to be out of range. Only one addition is needed.
|
||||
|
||||
define void @fun(<2 x i64>* %Src, <2 x i64>* %Dst) {
|
||||
; CHECK-LABEL: fun:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: aghi %r2, 4096
|
||||
; CHECK-NEXT: vl %v0, 0(%r2), 3
|
||||
; CHECK-NEXT: vst %v0, 0(%r3), 3
|
||||
; CHECK-NEXT: vl %v0, 16(%r2), 3
|
||||
; CHECK-NEXT: vst %v0, 0(%r3), 3
|
||||
; CHECK-NEXT: br %r14
|
||||
%1 = bitcast <2 x i64>* %Src to i8*
|
||||
|
||||
%splitgep = getelementptr i8, i8* %1, i64 4096
|
||||
%2 = bitcast i8* %splitgep to <2 x i64>*
|
||||
%V0 = load <2 x i64>, <2 x i64>* %2, align 8
|
||||
store volatile <2 x i64> %V0, <2 x i64>* %Dst, align 8
|
||||
|
||||
%3 = getelementptr i8, i8* %splitgep, i64 16
|
||||
%4 = bitcast i8* %3 to <2 x i64>*
|
||||
%V1 = load <2 x i64>, <2 x i64>* %4, align 8
|
||||
store volatile <2 x i64> %V1, <2 x i64>* %Dst, align 8
|
||||
|
||||
ret void
|
||||
}
|
|
@ -0,0 +1,217 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -mcpu=z15 < %s -mtriple=s390x-linux-gnu | FileCheck %s
|
||||
;
|
||||
; Test memcpys of small constant lengths that should not be done with MVC.
|
||||
|
||||
declare void @llvm.memcpy.p0i8.p0i8.i64(i8 *nocapture, i8 *nocapture, i64, i1) nounwind
|
||||
|
||||
define void @fun16(i8* %Src, i8* %Dst, i8 %val) {
|
||||
; CHECK-LABEL: fun16:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: mvc 0(16,%r3), 0(%r2)
|
||||
; CHECK-NEXT: br %r14
|
||||
call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %Dst, i8* align 16 %Src, i64 16, i1 false)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @fun17(i8* %Src, i8* %Dst, i8 %val) {
|
||||
; CHECK-LABEL: fun17:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: lb %r0, 16(%r2)
|
||||
; CHECK-NEXT: stc %r0, 16(%r3)
|
||||
; CHECK-NEXT: vl %v0, 0(%r2), 4
|
||||
; CHECK-NEXT: vst %v0, 0(%r3), 4
|
||||
; CHECK-NEXT: br %r14
|
||||
call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %Dst, i8* align 16 %Src, i64 17, i1 false)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @fun18(i8* %Src, i8* %Dst, i8 %val) {
|
||||
; CHECK-LABEL: fun18:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: lh %r0, 16(%r2)
|
||||
; CHECK-NEXT: sth %r0, 16(%r3)
|
||||
; CHECK-NEXT: vl %v0, 0(%r2), 4
|
||||
; CHECK-NEXT: vst %v0, 0(%r3), 4
|
||||
; CHECK-NEXT: br %r14
|
||||
call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %Dst, i8* align 16 %Src, i64 18, i1 false)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @fun19(i8* %Src, i8* %Dst, i8 %val) {
|
||||
; CHECK-LABEL: fun19:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: l %r0, 15(%r2)
|
||||
; CHECK-NEXT: st %r0, 15(%r3)
|
||||
; CHECK-NEXT: vl %v0, 0(%r2), 4
|
||||
; CHECK-NEXT: vst %v0, 0(%r3), 4
|
||||
; CHECK-NEXT: br %r14
|
||||
call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %Dst, i8* align 16 %Src, i64 19, i1 false)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @fun20(i8* %Src, i8* %Dst, i8 %val) {
|
||||
; CHECK-LABEL: fun20:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: l %r0, 16(%r2)
|
||||
; CHECK-NEXT: st %r0, 16(%r3)
|
||||
; CHECK-NEXT: vl %v0, 0(%r2), 4
|
||||
; CHECK-NEXT: vst %v0, 0(%r3), 4
|
||||
; CHECK-NEXT: br %r14
|
||||
call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %Dst, i8* align 16 %Src, i64 20, i1 false)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @fun21(i8* %Src, i8* %Dst, i8 %val) {
|
||||
; CHECK-LABEL: fun21:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: lg %r0, 13(%r2)
|
||||
; CHECK-NEXT: stg %r0, 13(%r3)
|
||||
; CHECK-NEXT: vl %v0, 0(%r2), 4
|
||||
; CHECK-NEXT: vst %v0, 0(%r3), 4
|
||||
; CHECK-NEXT: br %r14
|
||||
call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %Dst, i8* align 16 %Src, i64 21, i1 false)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @fun22(i8* %Src, i8* %Dst, i8 %val) {
|
||||
; CHECK-LABEL: fun22:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: lg %r0, 14(%r2)
|
||||
; CHECK-NEXT: stg %r0, 14(%r3)
|
||||
; CHECK-NEXT: vl %v0, 0(%r2), 4
|
||||
; CHECK-NEXT: vst %v0, 0(%r3), 4
|
||||
; CHECK-NEXT: br %r14
|
||||
call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %Dst, i8* align 16 %Src, i64 22, i1 false)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @fun23(i8* %Src, i8* %Dst, i8 %val) {
|
||||
; CHECK-LABEL: fun23:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: lg %r0, 15(%r2)
|
||||
; CHECK-NEXT: stg %r0, 15(%r3)
|
||||
; CHECK-NEXT: vl %v0, 0(%r2), 4
|
||||
; CHECK-NEXT: vst %v0, 0(%r3), 4
|
||||
; CHECK-NEXT: br %r14
|
||||
call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %Dst, i8* align 16 %Src, i64 23, i1 false)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @fun24(i8* %Src, i8* %Dst, i8 %val) {
|
||||
; CHECK-LABEL: fun24:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: lg %r0, 16(%r2)
|
||||
; CHECK-NEXT: stg %r0, 16(%r3)
|
||||
; CHECK-NEXT: vl %v0, 0(%r2), 4
|
||||
; CHECK-NEXT: vst %v0, 0(%r3), 4
|
||||
; CHECK-NEXT: br %r14
|
||||
call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %Dst, i8* align 16 %Src, i64 24, i1 false)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @fun25(i8* %Src, i8* %Dst, i8 %val) {
|
||||
; CHECK-LABEL: fun25:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vl %v0, 9(%r2)
|
||||
; CHECK-NEXT: vst %v0, 9(%r3)
|
||||
; CHECK-NEXT: vl %v0, 0(%r2), 4
|
||||
; CHECK-NEXT: vst %v0, 0(%r3), 4
|
||||
; CHECK-NEXT: br %r14
|
||||
call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %Dst, i8* align 16 %Src, i64 25, i1 false)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @fun26(i8* %Src, i8* %Dst, i8 %val) {
|
||||
; CHECK-LABEL: fun26:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vl %v0, 10(%r2)
|
||||
; CHECK-NEXT: vst %v0, 10(%r3)
|
||||
; CHECK-NEXT: vl %v0, 0(%r2), 4
|
||||
; CHECK-NEXT: vst %v0, 0(%r3), 4
|
||||
; CHECK-NEXT: br %r14
|
||||
call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %Dst, i8* align 16 %Src, i64 26, i1 false)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @fun27(i8* %Src, i8* %Dst, i8 %val) {
|
||||
; CHECK-LABEL: fun27:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vl %v0, 11(%r2)
|
||||
; CHECK-NEXT: vst %v0, 11(%r3)
|
||||
; CHECK-NEXT: vl %v0, 0(%r2), 4
|
||||
; CHECK-NEXT: vst %v0, 0(%r3), 4
|
||||
; CHECK-NEXT: br %r14
|
||||
call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %Dst, i8* align 16 %Src, i64 27, i1 false)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @fun28(i8* %Src, i8* %Dst, i8 %val) {
|
||||
; CHECK-LABEL: fun28:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vl %v0, 12(%r2)
|
||||
; CHECK-NEXT: vst %v0, 12(%r3)
|
||||
; CHECK-NEXT: vl %v0, 0(%r2), 4
|
||||
; CHECK-NEXT: vst %v0, 0(%r3), 4
|
||||
; CHECK-NEXT: br %r14
|
||||
call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %Dst, i8* align 16 %Src, i64 28, i1 false)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @fun29(i8* %Src, i8* %Dst, i8 %val) {
|
||||
; CHECK-LABEL: fun29:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vl %v0, 13(%r2)
|
||||
; CHECK-NEXT: vst %v0, 13(%r3)
|
||||
; CHECK-NEXT: vl %v0, 0(%r2), 4
|
||||
; CHECK-NEXT: vst %v0, 0(%r3), 4
|
||||
; CHECK-NEXT: br %r14
|
||||
call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %Dst, i8* align 16 %Src, i64 29, i1 false)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @fun30(i8* %Src, i8* %Dst, i8 %val) {
|
||||
; CHECK-LABEL: fun30:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vl %v0, 14(%r2)
|
||||
; CHECK-NEXT: vst %v0, 14(%r3)
|
||||
; CHECK-NEXT: vl %v0, 0(%r2), 4
|
||||
; CHECK-NEXT: vst %v0, 0(%r3), 4
|
||||
; CHECK-NEXT: br %r14
|
||||
call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %Dst, i8* align 16 %Src, i64 30, i1 false)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @fun31(i8* %Src, i8* %Dst, i8 %val) {
|
||||
; CHECK-LABEL: fun31:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vl %v0, 15(%r2)
|
||||
; CHECK-NEXT: vst %v0, 15(%r3)
|
||||
; CHECK-NEXT: vl %v0, 0(%r2), 4
|
||||
; CHECK-NEXT: vst %v0, 0(%r3), 4
|
||||
; CHECK-NEXT: br %r14
|
||||
call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %Dst, i8* align 16 %Src, i64 31, i1 false)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @fun32(i8* %Src, i8* %Dst, i8 %val) {
|
||||
; CHECK-LABEL: fun32:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vl %v0, 16(%r2), 4
|
||||
; CHECK-NEXT: vst %v0, 16(%r3), 4
|
||||
; CHECK-NEXT: vl %v0, 0(%r2), 4
|
||||
; CHECK-NEXT: vst %v0, 0(%r3), 4
|
||||
; CHECK-NEXT: br %r14
|
||||
call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %Dst, i8* align 16 %Src, i64 32, i1 false)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @fun33(i8* %Src, i8* %Dst, i8 %val) {
|
||||
; CHECK-LABEL: fun33:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: mvc 0(33,%r3), 0(%r2)
|
||||
; CHECK-NEXT: br %r14
|
||||
call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %Dst, i8* align 16 %Src, i64 33, i1 false)
|
||||
ret void
|
||||
}
|
||||
|
|
@ -0,0 +1,420 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -mcpu=z15 %s -mtriple=s390x-linux-gnu -o - | FileCheck %s
|
||||
;
|
||||
; Test memsets of small constant lengths, that should not be done with MVC.
|
||||
|
||||
declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1 immarg)
|
||||
|
||||
define void @reg17(i8* %Dst, i8 %val) {
|
||||
; CHECK-LABEL: reg17:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: stc %r3, 0(%r2)
|
||||
; CHECK-NEXT: mvc 1(16,%r2), 0(%r2)
|
||||
; CHECK-NEXT: br %r14
|
||||
call void @llvm.memset.p0i8.i64(i8* align 16 %Dst, i8 %val, i64 17, i1 false)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @reg18(i8* %Dst, i8 %val) {
|
||||
; CHECK-LABEL: reg18:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vlvgp %v0, %r3, %r3
|
||||
; CHECK-NEXT: vrepb %v0, %v0, 7
|
||||
; CHECK-NEXT: vst %v0, 0(%r2), 4
|
||||
; CHECK-NEXT: vsteh %v0, 16(%r2), 0
|
||||
; CHECK-NEXT: br %r14
|
||||
call void @llvm.memset.p0i8.i64(i8* align 16 %Dst, i8 %val, i64 18, i1 false)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @reg19(i8* %Dst, i8 %val) {
|
||||
; CHECK-LABEL: reg19:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vlvgp %v0, %r3, %r3
|
||||
; CHECK-NEXT: vrepb %v0, %v0, 7
|
||||
; CHECK-NEXT: vstef %v0, 15(%r2), 0
|
||||
; CHECK-NEXT: vst %v0, 0(%r2), 4
|
||||
; CHECK-NEXT: br %r14
|
||||
call void @llvm.memset.p0i8.i64(i8* align 16 %Dst, i8 %val, i64 19, i1 false)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @reg20(i8* %Dst, i8 %val) {
|
||||
; CHECK-LABEL: reg20:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vlvgp %v0, %r3, %r3
|
||||
; CHECK-NEXT: vrepb %v0, %v0, 7
|
||||
; CHECK-NEXT: vstef %v0, 16(%r2), 0
|
||||
; CHECK-NEXT: vst %v0, 0(%r2), 4
|
||||
; CHECK-NEXT: br %r14
|
||||
call void @llvm.memset.p0i8.i64(i8* align 16 %Dst, i8 %val, i64 20, i1 false)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @reg21(i8* %Dst, i8 %val) {
|
||||
; CHECK-LABEL: reg21:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vlvgp %v0, %r3, %r3
|
||||
; CHECK-NEXT: vrepb %v0, %v0, 7
|
||||
; CHECK-NEXT: vsteg %v0, 13(%r2), 0
|
||||
; CHECK-NEXT: vst %v0, 0(%r2), 4
|
||||
; CHECK-NEXT: br %r14
|
||||
call void @llvm.memset.p0i8.i64(i8* align 16 %Dst, i8 %val, i64 21, i1 false)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @reg22(i8* %Dst, i8 %val) {
|
||||
; CHECK-LABEL: reg22:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vlvgp %v0, %r3, %r3
|
||||
; CHECK-NEXT: vrepb %v0, %v0, 7
|
||||
; CHECK-NEXT: vsteg %v0, 14(%r2), 0
|
||||
; CHECK-NEXT: vst %v0, 0(%r2), 4
|
||||
; CHECK-NEXT: br %r14
|
||||
call void @llvm.memset.p0i8.i64(i8* align 16 %Dst, i8 %val, i64 22, i1 false)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @reg23(i8* %Dst, i8 %val) {
|
||||
; CHECK-LABEL: reg23:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vlvgp %v0, %r3, %r3
|
||||
; CHECK-NEXT: vrepb %v0, %v0, 7
|
||||
; CHECK-NEXT: vsteg %v0, 15(%r2), 0
|
||||
; CHECK-NEXT: vst %v0, 0(%r2), 4
|
||||
; CHECK-NEXT: br %r14
|
||||
call void @llvm.memset.p0i8.i64(i8* align 16 %Dst, i8 %val, i64 23, i1 false)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @reg24(i8* %Dst, i8 %val) {
|
||||
; CHECK-LABEL: reg24:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vlvgp %v0, %r3, %r3
|
||||
; CHECK-NEXT: vrepb %v0, %v0, 7
|
||||
; CHECK-NEXT: vsteg %v0, 16(%r2), 0
|
||||
; CHECK-NEXT: vst %v0, 0(%r2), 4
|
||||
; CHECK-NEXT: br %r14
|
||||
call void @llvm.memset.p0i8.i64(i8* align 16 %Dst, i8 %val, i64 24, i1 false)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @reg25(i8* %Dst, i8 %val) {
|
||||
; CHECK-LABEL: reg25:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vlvgp %v0, %r3, %r3
|
||||
; CHECK-NEXT: vrepb %v0, %v0, 7
|
||||
; CHECK-NEXT: vst %v0, 9(%r2)
|
||||
; CHECK-NEXT: vst %v0, 0(%r2), 4
|
||||
; CHECK-NEXT: br %r14
|
||||
call void @llvm.memset.p0i8.i64(i8* align 16 %Dst, i8 %val, i64 25, i1 false)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @reg26(i8* %Dst, i8 %val) {
|
||||
; CHECK-LABEL: reg26:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vlvgp %v0, %r3, %r3
|
||||
; CHECK-NEXT: vrepb %v0, %v0, 7
|
||||
; CHECK-NEXT: vst %v0, 10(%r2)
|
||||
; CHECK-NEXT: vst %v0, 0(%r2), 4
|
||||
; CHECK-NEXT: br %r14
|
||||
call void @llvm.memset.p0i8.i64(i8* align 16 %Dst, i8 %val, i64 26, i1 false)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @reg27(i8* %Dst, i8 %val) {
|
||||
; CHECK-LABEL: reg27:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vlvgp %v0, %r3, %r3
|
||||
; CHECK-NEXT: vrepb %v0, %v0, 7
|
||||
; CHECK-NEXT: vst %v0, 11(%r2)
|
||||
; CHECK-NEXT: vst %v0, 0(%r2), 4
|
||||
; CHECK-NEXT: br %r14
|
||||
call void @llvm.memset.p0i8.i64(i8* align 16 %Dst, i8 %val, i64 27, i1 false)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @reg28(i8* %Dst, i8 %val) {
|
||||
; CHECK-LABEL: reg28:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vlvgp %v0, %r3, %r3
|
||||
; CHECK-NEXT: vrepb %v0, %v0, 7
|
||||
; CHECK-NEXT: vst %v0, 12(%r2)
|
||||
; CHECK-NEXT: vst %v0, 0(%r2), 4
|
||||
; CHECK-NEXT: br %r14
|
||||
call void @llvm.memset.p0i8.i64(i8* align 16 %Dst, i8 %val, i64 28, i1 false)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @reg29(i8* %Dst, i8 %val) {
|
||||
; CHECK-LABEL: reg29:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vlvgp %v0, %r3, %r3
|
||||
; CHECK-NEXT: vrepb %v0, %v0, 7
|
||||
; CHECK-NEXT: vst %v0, 13(%r2)
|
||||
; CHECK-NEXT: vst %v0, 0(%r2), 4
|
||||
; CHECK-NEXT: br %r14
|
||||
call void @llvm.memset.p0i8.i64(i8* align 16 %Dst, i8 %val, i64 29, i1 false)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @reg30(i8* %Dst, i8 %val) {
|
||||
; CHECK-LABEL: reg30:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vlvgp %v0, %r3, %r3
|
||||
; CHECK-NEXT: vrepb %v0, %v0, 7
|
||||
; CHECK-NEXT: vst %v0, 14(%r2)
|
||||
; CHECK-NEXT: vst %v0, 0(%r2), 4
|
||||
; CHECK-NEXT: br %r14
|
||||
call void @llvm.memset.p0i8.i64(i8* align 16 %Dst, i8 %val, i64 30, i1 false)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @reg31(i8* %Dst, i8 %val) {
|
||||
; CHECK-LABEL: reg31:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vlvgp %v0, %r3, %r3
|
||||
; CHECK-NEXT: vrepb %v0, %v0, 7
|
||||
; CHECK-NEXT: vst %v0, 15(%r2)
|
||||
; CHECK-NEXT: vst %v0, 0(%r2), 4
|
||||
; CHECK-NEXT: br %r14
|
||||
call void @llvm.memset.p0i8.i64(i8* align 16 %Dst, i8 %val, i64 31, i1 false)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @reg32(i8* %Dst, i8 %val) {
|
||||
; CHECK-LABEL: reg32:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vlvgp %v0, %r3, %r3
|
||||
; CHECK-NEXT: vrepb %v0, %v0, 7
|
||||
; CHECK-NEXT: vst %v0, 16(%r2), 4
|
||||
; CHECK-NEXT: vst %v0, 0(%r2), 4
|
||||
; CHECK-NEXT: br %r14
|
||||
call void @llvm.memset.p0i8.i64(i8* align 16 %Dst, i8 %val, i64 32, i1 false)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @reg33(i8* %Dst, i8 %val) {
|
||||
; CHECK-LABEL: reg33:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: stc %r3, 0(%r2)
|
||||
; CHECK-NEXT: mvc 1(32,%r2), 0(%r2)
|
||||
; CHECK-NEXT: br %r14
|
||||
call void @llvm.memset.p0i8.i64(i8* align 16 %Dst, i8 %val, i64 33, i1 false)
|
||||
ret void
|
||||
}
|
||||
|
||||
;; Immediate value
|
||||
|
||||
define void @imm17(i8* %Dst) {
|
||||
; CHECK-LABEL: imm17:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: mvi 0(%r2), 1
|
||||
; CHECK-NEXT: mvc 1(16,%r2), 0(%r2)
|
||||
; CHECK-NEXT: br %r14
|
||||
call void @llvm.memset.p0i8.i64(i8* align 16 %Dst, i8 1, i64 17, i1 false)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @imm18(i8* %Dst) {
|
||||
; CHECK-LABEL: imm18:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vgbm %v0, 65535
|
||||
; CHECK-NEXT: vst %v0, 0(%r2), 4
|
||||
; CHECK-NEXT: mvhhi 16(%r2), -1
|
||||
; CHECK-NEXT: br %r14
|
||||
call void @llvm.memset.p0i8.i64(i8* align 16 %Dst, i8 -1, i64 18, i1 false)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @zero18(i8* %Dst) {
|
||||
; CHECK-LABEL: zero18:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: xc 0(18,%r2), 0(%r2)
|
||||
; CHECK-NEXT: br %r14
|
||||
call void @llvm.memset.p0i8.i64(i8* align 16 %Dst, i8 0, i64 18, i1 false)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @imm19(i8* %Dst) {
|
||||
; CHECK-LABEL: imm19:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vrepib %v0, 1
|
||||
; CHECK-NEXT: vstef %v0, 15(%r2), 0
|
||||
; CHECK-NEXT: vst %v0, 0(%r2), 4
|
||||
; CHECK-NEXT: br %r14
|
||||
call void @llvm.memset.p0i8.i64(i8* align 16 %Dst, i8 1, i64 19, i1 false)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @imm20(i8* %Dst) {
|
||||
; CHECK-LABEL: imm20:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vgbm %v0, 65535
|
||||
; CHECK-NEXT: vst %v0, 0(%r2), 4
|
||||
; CHECK-NEXT: mvhi 16(%r2), -1
|
||||
; CHECK-NEXT: br %r14
|
||||
call void @llvm.memset.p0i8.i64(i8* align 16 %Dst, i8 -1, i64 20, i1 false)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @imm21(i8* %Dst) {
|
||||
; CHECK-LABEL: imm21:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vrepib %v0, 1
|
||||
; CHECK-NEXT: vsteg %v0, 13(%r2), 0
|
||||
; CHECK-NEXT: vst %v0, 0(%r2), 4
|
||||
; CHECK-NEXT: br %r14
|
||||
call void @llvm.memset.p0i8.i64(i8* align 16 %Dst, i8 1, i64 21, i1 false)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @imm22(i8* %Dst) {
|
||||
; CHECK-LABEL: imm22:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vgbm %v0, 65535
|
||||
; CHECK-NEXT: vst %v0, 0(%r2), 4
|
||||
; CHECK-NEXT: mvghi 14(%r2), -1
|
||||
; CHECK-NEXT: br %r14
|
||||
call void @llvm.memset.p0i8.i64(i8* align 16 %Dst, i8 -1, i64 22, i1 false)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @imm23(i8* %Dst) {
|
||||
; CHECK-LABEL: imm23:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vrepib %v0, 1
|
||||
; CHECK-NEXT: vsteg %v0, 15(%r2), 0
|
||||
; CHECK-NEXT: vst %v0, 0(%r2), 4
|
||||
; CHECK-NEXT: br %r14
|
||||
call void @llvm.memset.p0i8.i64(i8* align 16 %Dst, i8 1, i64 23, i1 false)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @imm24(i8* %Dst) {
|
||||
; CHECK-LABEL: imm24:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vgbm %v0, 65535
|
||||
; CHECK-NEXT: vst %v0, 0(%r2), 4
|
||||
; CHECK-NEXT: mvghi 16(%r2), -1
|
||||
; CHECK-NEXT: br %r14
|
||||
call void @llvm.memset.p0i8.i64(i8* align 16 %Dst, i8 -1, i64 24, i1 false)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @imm25(i8* %Dst) {
|
||||
; CHECK-LABEL: imm25:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vrepib %v0, 1
|
||||
; CHECK-NEXT: vst %v0, 9(%r2)
|
||||
; CHECK-NEXT: vst %v0, 0(%r2), 4
|
||||
; CHECK-NEXT: br %r14
|
||||
call void @llvm.memset.p0i8.i64(i8* align 16 %Dst, i8 1, i64 25, i1 false)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @imm26(i8* %Dst) {
|
||||
; CHECK-LABEL: imm26:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vgbm %v0, 65535
|
||||
; CHECK-NEXT: vst %v0, 10(%r2)
|
||||
; CHECK-NEXT: vst %v0, 0(%r2), 4
|
||||
; CHECK-NEXT: br %r14
|
||||
call void @llvm.memset.p0i8.i64(i8* align 16 %Dst, i8 -1, i64 26, i1 false)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @zero26(i8* %Dst) {
|
||||
; CHECK-LABEL: zero26:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: xc 0(26,%r2), 0(%r2)
|
||||
; CHECK-NEXT: br %r14
|
||||
call void @llvm.memset.p0i8.i64(i8* align 16 %Dst, i8 0, i64 26, i1 false)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @imm27(i8* %Dst) {
|
||||
; CHECK-LABEL: imm27:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vrepib %v0, 1
|
||||
; CHECK-NEXT: vst %v0, 11(%r2)
|
||||
; CHECK-NEXT: vst %v0, 0(%r2), 4
|
||||
; CHECK-NEXT: br %r14
|
||||
call void @llvm.memset.p0i8.i64(i8* align 16 %Dst, i8 1, i64 27, i1 false)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @imm28(i8* %Dst) {
|
||||
; CHECK-LABEL: imm28:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vgbm %v0, 65535
|
||||
; CHECK-NEXT: vst %v0, 12(%r2)
|
||||
; CHECK-NEXT: vst %v0, 0(%r2), 4
|
||||
; CHECK-NEXT: br %r14
|
||||
call void @llvm.memset.p0i8.i64(i8* align 16 %Dst, i8 -1, i64 28, i1 false)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @imm29(i8* %Dst) {
|
||||
; CHECK-LABEL: imm29:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vrepib %v0, 1
|
||||
; CHECK-NEXT: vst %v0, 13(%r2)
|
||||
; CHECK-NEXT: vst %v0, 0(%r2), 4
|
||||
; CHECK-NEXT: br %r14
|
||||
call void @llvm.memset.p0i8.i64(i8* align 16 %Dst, i8 1, i64 29, i1 false)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @imm30(i8* %Dst) {
|
||||
; CHECK-LABEL: imm30:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vgbm %v0, 65535
|
||||
; CHECK-NEXT: vst %v0, 14(%r2)
|
||||
; CHECK-NEXT: vst %v0, 0(%r2), 4
|
||||
; CHECK-NEXT: br %r14
|
||||
call void @llvm.memset.p0i8.i64(i8* align 16 %Dst, i8 -1, i64 30, i1 false)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @imm31(i8* %Dst) {
|
||||
; CHECK-LABEL: imm31:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vrepib %v0, 1
|
||||
; CHECK-NEXT: vst %v0, 15(%r2)
|
||||
; CHECK-NEXT: vst %v0, 0(%r2), 4
|
||||
; CHECK-NEXT: br %r14
|
||||
call void @llvm.memset.p0i8.i64(i8* align 16 %Dst, i8 1, i64 31, i1 false)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @imm32(i8* %Dst) {
|
||||
; CHECK-LABEL: imm32:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vgbm %v0, 65535
|
||||
; CHECK-NEXT: vst %v0, 16(%r2), 4
|
||||
; CHECK-NEXT: vst %v0, 0(%r2), 4
|
||||
; CHECK-NEXT: br %r14
|
||||
call void @llvm.memset.p0i8.i64(i8* align 16 %Dst, i8 -1, i64 32, i1 false)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @zero32(i8* %Dst) {
|
||||
; CHECK-LABEL: zero32:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: xc 0(32,%r2), 0(%r2)
|
||||
; CHECK-NEXT: br %r14
|
||||
call void @llvm.memset.p0i8.i64(i8* align 16 %Dst, i8 0, i64 32, i1 false)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @imm33(i8* %Dst) {
|
||||
; CHECK-LABEL: imm33:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: mvi 0(%r2), 1
|
||||
; CHECK-NEXT: mvc 1(32,%r2), 0(%r2)
|
||||
; CHECK-NEXT: br %r14
|
||||
call void @llvm.memset.p0i8.i64(i8* align 16 %Dst, i8 1, i64 33, i1 false)
|
||||
ret void
|
||||
}
|
|
@ -0,0 +1,380 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z15 | FileCheck %s
|
||||
;
|
||||
; Test storing of replicated values using vector replicate type instructions.
|
||||
|
||||
;; Replicated registers
|
||||
|
||||
define void @fun_2x1b(i8* %Src, i16* %Dst) {
|
||||
; CHECK-LABEL: fun_2x1b:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vlrepb %v0, 0(%r2)
|
||||
; CHECK-NEXT: vsteh %v0, 0(%r3), 0
|
||||
; CHECK-NEXT: br %r14
|
||||
%i = load i8, i8* %Src
|
||||
%ZE = zext i8 %i to i16
|
||||
%Val = mul i16 %ZE, 257
|
||||
store i16 %Val, i16* %Dst
|
||||
ret void
|
||||
}
|
||||
|
||||
; Test multiple stores of same value.
|
||||
define void @fun_4x1b(i8* %Src, i32* %Dst, i32* %Dst2) {
|
||||
; CHECK-LABEL: fun_4x1b:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vlrepb %v0, 0(%r2)
|
||||
; CHECK-NEXT: vstef %v0, 0(%r3), 0
|
||||
; CHECK-NEXT: vstef %v0, 0(%r4), 0
|
||||
; CHECK-NEXT: br %r14
|
||||
%i = load i8, i8* %Src
|
||||
%ZE = zext i8 %i to i32
|
||||
%Val = mul i32 %ZE, 16843009
|
||||
store i32 %Val, i32* %Dst
|
||||
store i32 %Val, i32* %Dst2
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @fun_8x1b(i8* %Src, i64* %Dst) {
|
||||
; CHECK-LABEL: fun_8x1b:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vlrepb %v0, 0(%r2)
|
||||
; CHECK-NEXT: vsteg %v0, 0(%r3), 0
|
||||
; CHECK-NEXT: br %r14
|
||||
%i = load i8, i8* %Src
|
||||
%ZE = zext i8 %i to i64
|
||||
%Val = mul i64 %ZE, 72340172838076673
|
||||
store i64 %Val, i64* %Dst
|
||||
ret void
|
||||
}
|
||||
|
||||
; A second truncated store of same value.
|
||||
define void @fun_8x1b_4x1b(i8* %Src, i64* %Dst, i32* %Dst2) {
|
||||
; CHECK-LABEL: fun_8x1b_4x1b:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vlrepb %v0, 0(%r2)
|
||||
; CHECK-NEXT: vsteg %v0, 0(%r3), 0
|
||||
; CHECK-NEXT: vstef %v0, 0(%r4), 0
|
||||
; CHECK-NEXT: br %r14
|
||||
%i = load i8, i8* %Src
|
||||
%ZE = zext i8 %i to i64
|
||||
%Val = mul i64 %ZE, 72340172838076673
|
||||
store i64 %Val, i64* %Dst
|
||||
%TrVal = trunc i64 %Val to i32
|
||||
store i32 %TrVal, i32* %Dst2
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @fun_2x2b(i16* %Src, i32* %Dst) {
|
||||
; CHECK-LABEL: fun_2x2b:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vlreph %v0, 0(%r2)
|
||||
; CHECK-NEXT: vstef %v0, 0(%r3), 0
|
||||
; CHECK-NEXT: br %r14
|
||||
%i = load i16, i16* %Src
|
||||
%ZE = zext i16 %i to i32
|
||||
%Val = mul i32 %ZE, 65537
|
||||
store i32 %Val, i32* %Dst
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @fun_4x2b(i16* %Src, i64* %Dst) {
|
||||
; CHECK-LABEL: fun_4x2b:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vlreph %v0, 0(%r2)
|
||||
; CHECK-NEXT: vsteg %v0, 0(%r3), 0
|
||||
; CHECK-NEXT: br %r14
|
||||
%i = load i16, i16* %Src
|
||||
%ZE = zext i16 %i to i64
|
||||
%Val = mul i64 %ZE, 281479271743489
|
||||
store i64 %Val, i64* %Dst
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @fun_2x4b(i32* %Src, i64* %Dst) {
|
||||
; CHECK-LABEL: fun_2x4b:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vlrepf %v0, 0(%r2)
|
||||
; CHECK-NEXT: vsteg %v0, 0(%r3), 0
|
||||
; CHECK-NEXT: br %r14
|
||||
%i = load i32, i32* %Src
|
||||
%ZE = zext i32 %i to i64
|
||||
%Val = mul i64 %ZE, 4294967297
|
||||
store i64 %Val, i64* %Dst
|
||||
ret void
|
||||
}
|
||||
|
||||
;; Replicated registers already in a vector.
|
||||
|
||||
; Test multiple stores of same value.
|
||||
define void @fun_2Eltsx8x1b(i8* %Src, <2 x i64>* %Dst, <2 x i64>* %Dst2) {
|
||||
; CHECK-LABEL: fun_2Eltsx8x1b:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vlrepb %v0, 0(%r2)
|
||||
; CHECK-NEXT: vst %v0, 0(%r3), 3
|
||||
; CHECK-NEXT: vst %v0, 0(%r4), 3
|
||||
; CHECK-NEXT: br %r14
|
||||
%i = load i8, i8* %Src
|
||||
%ZE = zext i8 %i to i64
|
||||
%Mul = mul i64 %ZE, 72340172838076673
|
||||
%tmp = insertelement <2 x i64> undef, i64 %Mul, i32 0
|
||||
%Val = shufflevector <2 x i64> %tmp, <2 x i64> undef, <2 x i32> zeroinitializer
|
||||
store <2 x i64> %Val, <2 x i64>* %Dst
|
||||
store <2 x i64> %Val, <2 x i64>* %Dst2
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @fun_4Eltsx2x2b(i16* %Src, <4 x i32>* %Dst) {
|
||||
; CHECK-LABEL: fun_4Eltsx2x2b:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vlreph %v0, 0(%r2)
|
||||
; CHECK-NEXT: vst %v0, 0(%r3), 3
|
||||
; CHECK-NEXT: br %r14
|
||||
%i = load i16, i16* %Src
|
||||
%ZE = zext i16 %i to i32
|
||||
%Mul = mul i32 %ZE, 65537
|
||||
%tmp = insertelement <4 x i32> undef, i32 %Mul, i32 0
|
||||
%Val = shufflevector <4 x i32> %tmp, <4 x i32> undef, <4 x i32> zeroinitializer
|
||||
store <4 x i32> %Val, <4 x i32>* %Dst
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @fun_6Eltsx2x2b(i16* %Src, <6 x i32>* %Dst) {
|
||||
; CHECK-LABEL: fun_6Eltsx2x2b:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vlreph %v0, 0(%r2)
|
||||
; CHECK-NEXT: vsteg %v0, 16(%r3), 0
|
||||
; CHECK-NEXT: vst %v0, 0(%r3), 4
|
||||
; CHECK-NEXT: br %r14
|
||||
%i = load i16, i16* %Src
|
||||
%ZE = zext i16 %i to i32
|
||||
%Mul = mul i32 %ZE, 65537
|
||||
%tmp = insertelement <6 x i32> undef, i32 %Mul, i32 0
|
||||
%Val = shufflevector <6 x i32> %tmp, <6 x i32> undef, <6 x i32> zeroinitializer
|
||||
store <6 x i32> %Val, <6 x i32>* %Dst
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @fun_2Eltsx2x4b(i32* %Src, <2 x i64>* %Dst) {
|
||||
; CHECK-LABEL: fun_2Eltsx2x4b:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vlrepf %v0, 0(%r2)
|
||||
; CHECK-NEXT: vst %v0, 0(%r3), 3
|
||||
; CHECK-NEXT: br %r14
|
||||
%i = load i32, i32* %Src
|
||||
%ZE = zext i32 %i to i64
|
||||
%Mul = mul i64 %ZE, 4294967297
|
||||
%tmp = insertelement <2 x i64> undef, i64 %Mul, i32 0
|
||||
%Val = shufflevector <2 x i64> %tmp, <2 x i64> undef, <2 x i32> zeroinitializer
|
||||
store <2 x i64> %Val, <2 x i64>* %Dst
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @fun_5Eltsx2x4b(i32* %Src, <5 x i64>* %Dst) {
|
||||
; CHECK-LABEL: fun_5Eltsx2x4b:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vlrepf %v0, 0(%r2)
|
||||
; CHECK-NEXT: vsteg %v0, 32(%r3), 0
|
||||
; CHECK-NEXT: vst %v0, 16(%r3), 4
|
||||
; CHECK-NEXT: vst %v0, 0(%r3), 4
|
||||
; CHECK-NEXT: br %r14
|
||||
%i = load i32, i32* %Src
|
||||
%ZE = zext i32 %i to i64
|
||||
%Mul = mul i64 %ZE, 4294967297
|
||||
%tmp = insertelement <5 x i64> undef, i64 %Mul, i32 0
|
||||
%Val = shufflevector <5 x i64> %tmp, <5 x i64> undef, <5 x i32> zeroinitializer
|
||||
store <5 x i64> %Val, <5 x i64>* %Dst
|
||||
ret void
|
||||
}
|
||||
|
||||
; Test replicating an incoming argument.
|
||||
define void @fun_8x1b_arg(i8 %Arg, i64* %Dst) {
|
||||
; CHECK-LABEL: fun_8x1b_arg:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vlvgp %v0, %r2, %r2
|
||||
; CHECK-NEXT: vrepb %v0, %v0, 7
|
||||
; CHECK-NEXT: vsteg %v0, 0(%r3), 0
|
||||
; CHECK-NEXT: br %r14
|
||||
%ZE = zext i8 %Arg to i64
|
||||
%Val = mul i64 %ZE, 72340172838076673
|
||||
store i64 %Val, i64* %Dst
|
||||
ret void
|
||||
}
|
||||
|
||||
; A replication of a non-local value (ISD::AssertZext case).
|
||||
define void @fun_nonlocalval() {
|
||||
; CHECK-LABEL: fun_nonlocalval:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: lhi %r0, 0
|
||||
; CHECK-NEXT: ciblh %r0, 0, 0(%r14)
|
||||
; CHECK-NEXT: .LBB13_1: # %bb2
|
||||
; CHECK-NEXT: llgf %r0, 0(%r1)
|
||||
; CHECK-NEXT: vlvgp %v0, %r0, %r0
|
||||
; CHECK-NEXT: vrepf %v0, %v0, 1
|
||||
; CHECK-NEXT: vst %v0, 0(%r1), 3
|
||||
; CHECK-NEXT: br %r14
|
||||
%i = load i32, i32* undef, align 4
|
||||
br i1 undef, label %bb2, label %bb7
|
||||
|
||||
bb2: ; preds = %bb1
|
||||
%i3 = zext i32 %i to i64
|
||||
%i4 = mul nuw i64 %i3, 4294967297
|
||||
%i5 = insertelement <2 x i64> poison, i64 %i4, i64 0
|
||||
%i6 = shufflevector <2 x i64> %i5, <2 x i64> poison, <2 x i32> zeroinitializer
|
||||
store <2 x i64> %i6, <2 x i64>* undef, align 8
|
||||
ret void
|
||||
|
||||
bb7:
|
||||
ret void
|
||||
}
|
||||
|
||||
;; Replicated immediates
|
||||
|
||||
; Some cases where scalar instruction is better
|
||||
define void @fun_8x1i_zero(i64* %Dst) {
|
||||
; CHECK-LABEL: fun_8x1i_zero:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: mvghi 0(%r2), 0
|
||||
; CHECK-NEXT: br %r14
|
||||
store i64 0, i64* %Dst
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @fun_4x1i_minus1(i32* %Dst) {
|
||||
; CHECK-LABEL: fun_4x1i_minus1:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: mvhi 0(%r2), -1
|
||||
; CHECK-NEXT: br %r14
|
||||
store i32 -1, i32* %Dst
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @fun_4x1i_allones(i32* %Dst) {
|
||||
; CHECK-LABEL: fun_4x1i_allones:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: mvhi 0(%r2), -1
|
||||
; CHECK-NEXT: br %r14
|
||||
store i32 4294967295, i32* %Dst
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @fun_2i(i16* %Dst) {
|
||||
; CHECK-LABEL: fun_2i:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: mvhhi 0(%r2), 1
|
||||
; CHECK-NEXT: br %r14
|
||||
store i16 1, i16* %Dst
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @fun_2x2i(i32* %Dst) {
|
||||
; CHECK-LABEL: fun_2x2i:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vrepih %v0, 1
|
||||
; CHECK-NEXT: vstef %v0, 0(%r2), 0
|
||||
; CHECK-NEXT: br %r14
|
||||
store i32 65537, i32* %Dst
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @fun_4x2i(i64* %Dst) {
|
||||
; CHECK-LABEL: fun_4x2i:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vrepih %v0, 1
|
||||
; CHECK-NEXT: vsteg %v0, 0(%r2), 0
|
||||
; CHECK-NEXT: br %r14
|
||||
store i64 281479271743489, i64* %Dst
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @fun_2x4i(i64* %Dst) {
|
||||
; CHECK-LABEL: fun_2x4i:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vrepif %v0, 1
|
||||
; CHECK-NEXT: vsteg %v0, 0(%r2), 0
|
||||
; CHECK-NEXT: br %r14
|
||||
store i64 4294967297, i64* %Dst
|
||||
ret void
|
||||
}
|
||||
|
||||
; Store replicated immediate twice using the same vector.
|
||||
define void @fun_4x1i(i32* %Dst, i32* %Dst2) {
|
||||
; CHECK-LABEL: fun_4x1i:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vrepib %v0, 3
|
||||
; CHECK-NEXT: vstef %v0, 0(%r2), 0
|
||||
; CHECK-NEXT: vstef %v0, 0(%r3), 0
|
||||
; CHECK-NEXT: br %r14
|
||||
store i32 50529027, i32* %Dst
|
||||
store i32 50529027, i32* %Dst2
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @fun_8x1i(i64* %Dst, i64* %Dst2) {
|
||||
; CHECK-LABEL: fun_8x1i:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vrepib %v0, 1
|
||||
; CHECK-NEXT: vsteg %v0, 0(%r2), 0
|
||||
; CHECK-NEXT: vsteg %v0, 0(%r3), 0
|
||||
; CHECK-NEXT: br %r14
|
||||
store i64 72340172838076673, i64* %Dst
|
||||
store i64 72340172838076673, i64* %Dst2
|
||||
ret void
|
||||
}
|
||||
|
||||
; Similar, but with vectors.
|
||||
define void @fun_4Eltsx4x1i_2Eltsx4x1i(<4 x i32>* %Dst, <2 x i32>* %Dst2) {
|
||||
; CHECK-LABEL: fun_4Eltsx4x1i_2Eltsx4x1i:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vrepib %v0, 3
|
||||
; CHECK-NEXT: vst %v0, 0(%r2), 3
|
||||
; CHECK-NEXT: vsteg %v0, 0(%r3), 0
|
||||
; CHECK-NEXT: br %r14
|
||||
%tmp = insertelement <4 x i32> undef, i32 50529027, i32 0
|
||||
%Val = shufflevector <4 x i32> %tmp, <4 x i32> undef, <4 x i32> zeroinitializer
|
||||
store <4 x i32> %Val, <4 x i32>* %Dst
|
||||
%tmp2 = insertelement <2 x i32> undef, i32 50529027, i32 0
|
||||
%Val2 = shufflevector <2 x i32> %tmp2, <2 x i32> undef, <2 x i32> zeroinitializer
|
||||
store <2 x i32> %Val2, <2 x i32>* %Dst2
|
||||
ret void
|
||||
}
|
||||
|
||||
; Same, but 64-bit store is scalar.
|
||||
define void @fun_4Eltsx4x1i_8x1i(<4 x i32>* %Dst, i64* %Dst2) {
|
||||
; CHECK-LABEL: fun_4Eltsx4x1i_8x1i:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vrepib %v0, 3
|
||||
; CHECK-NEXT: vst %v0, 0(%r2), 3
|
||||
; CHECK-NEXT: vsteg %v0, 0(%r3), 0
|
||||
; CHECK-NEXT: br %r14
|
||||
%tmp = insertelement <4 x i32> undef, i32 50529027, i32 0
|
||||
%Val = shufflevector <4 x i32> %tmp, <4 x i32> undef, <4 x i32> zeroinitializer
|
||||
store <4 x i32> %Val, <4 x i32>* %Dst
|
||||
store i64 217020518514230019, i64* %Dst2
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @fun_3Eltsx2x4i(<3 x i64>* %Dst) {
|
||||
; CHECK-LABEL: fun_3Eltsx2x4i:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vrepif %v0, 1
|
||||
; CHECK-NEXT: vsteg %v0, 16(%r2), 0
|
||||
; CHECK-NEXT: vst %v0, 0(%r2), 4
|
||||
; CHECK-NEXT: br %r14
|
||||
%tmp = insertelement <3 x i64> undef, i64 4294967297, i32 0
|
||||
%Val = shufflevector <3 x i64> %tmp, <3 x i64> undef, <3 x i32> zeroinitializer
|
||||
store <3 x i64> %Val, <3 x i64>* %Dst
|
||||
ret void
|
||||
}
|
||||
|
||||
; i128 replicated '1': not using vrepib, but should compile.
|
||||
define void @fun_16x1i(i128* %Dst) {
|
||||
; CHECK-LABEL: fun_16x1i:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: llihf %r0, 16843009
|
||||
; CHECK-NEXT: oilf %r0, 16843009
|
||||
; CHECK-NEXT: stg %r0, 8(%r2)
|
||||
; CHECK-NEXT: stg %r0, 0(%r2)
|
||||
; CHECK-NEXT: br %r14
|
||||
store i128 1334440654591915542993625911497130241, i128* %Dst
|
||||
ret void
|
||||
}
|
Loading…
Reference in New Issue