forked from OSchip/llvm-project
[SystemZ] Add support for new cpu architecture - arch13
This patch series adds support for the next-generation arch13 CPU architecture to the SystemZ backend. This includes: - Basic support for the new processor and its features. - Assembler/disassembler support for new instructions. - CodeGen for new instructions, including new LLVM intrinsics. - Scheduler description for the new processor. - Detection of arch13 as host processor. Note: No currently available Z system supports the arch13 architecture. Once new systems become available, the official system name will be added as supported -march name. llvm-svn: 365932
This commit is contained in:
parent
223573c8ba
commit
0f0a8b7784
|
@ -48,6 +48,9 @@ class SystemZTernaryConv<string name, LLVMType result, LLVMType arg>
|
|||
: GCCBuiltin<"__builtin_s390_" ## name>,
|
||||
Intrinsic<[result], [arg, arg, result], [IntrNoMem]>;
|
||||
|
||||
class SystemZTernaryConvCC<LLVMType result, LLVMType arg>
|
||||
: Intrinsic<[result, llvm_i32_ty], [arg, arg, result], [IntrNoMem]>;
|
||||
|
||||
class SystemZTernary<string name, LLVMType type>
|
||||
: SystemZTernaryConv<name, type, type>;
|
||||
|
||||
|
@ -415,6 +418,24 @@ let TargetPrefix = "s390" in {
|
|||
def int_s390_vstrl : GCCBuiltin<"__builtin_s390_vstrl">,
|
||||
Intrinsic<[], [llvm_v16i8_ty, llvm_i32_ty, llvm_ptr_ty],
|
||||
[IntrArgMemOnly, IntrWriteMem]>;
|
||||
|
||||
// Instructions from the Vector Enhancements Facility 2
|
||||
def int_s390_vsld : GCCBuiltin<"__builtin_s390_vsld">,
|
||||
Intrinsic<[llvm_v16i8_ty],
|
||||
[llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty],
|
||||
[IntrNoMem, ImmArg<2>]>;
|
||||
|
||||
def int_s390_vsrd : GCCBuiltin<"__builtin_s390_vsrd">,
|
||||
Intrinsic<[llvm_v16i8_ty],
|
||||
[llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty],
|
||||
[IntrNoMem, ImmArg<2>]>;
|
||||
|
||||
def int_s390_vstrsb : SystemZTernaryConvCC<llvm_v16i8_ty, llvm_v16i8_ty>;
|
||||
def int_s390_vstrsh : SystemZTernaryConvCC<llvm_v16i8_ty, llvm_v8i16_ty>;
|
||||
def int_s390_vstrsf : SystemZTernaryConvCC<llvm_v16i8_ty, llvm_v4i32_ty>;
|
||||
def int_s390_vstrszb : SystemZTernaryConvCC<llvm_v16i8_ty, llvm_v16i8_ty>;
|
||||
def int_s390_vstrszh : SystemZTernaryConvCC<llvm_v16i8_ty, llvm_v8i16_ty>;
|
||||
def int_s390_vstrszf : SystemZTernaryConvCC<llvm_v16i8_ty, llvm_v4i32_ty>;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
|
|
@ -315,6 +315,8 @@ StringRef sys::detail::getHostCPUNameForS390x(StringRef ProcCpuinfoContent) {
|
|||
Pos += sizeof("machine = ") - 1;
|
||||
unsigned int Id;
|
||||
if (!Lines[I].drop_front(Pos).getAsInteger(10, Id)) {
|
||||
if (Id >= 8561 && HaveVectorSupport)
|
||||
return "arch13";
|
||||
if (Id >= 3906 && HaveVectorSupport)
|
||||
return "z14";
|
||||
if (Id >= 2964 && HaveVectorSupport)
|
||||
|
|
|
@ -239,6 +239,51 @@ def Arch12NewFeatures : SystemZFeatureList<[
|
|||
FeatureInsertReferenceBitsMultiple
|
||||
]>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// New features added in the Thirteenth Edition of the z/Architecture
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
def FeatureMiscellaneousExtensions3 : SystemZFeature<
|
||||
"miscellaneous-extensions-3", "MiscellaneousExtensions3",
|
||||
"Assume that the miscellaneous-extensions facility 3 is installed"
|
||||
>;
|
||||
|
||||
def FeatureMessageSecurityAssist9 : SystemZFeature<
|
||||
"message-security-assist-extension9", "MessageSecurityAssist9",
|
||||
"Assume that the message-security-assist extension facility 9 is installed"
|
||||
>;
|
||||
|
||||
def FeatureVectorEnhancements2 : SystemZFeature<
|
||||
"vector-enhancements-2", "VectorEnhancements2",
|
||||
"Assume that the vector enhancements facility 2 is installed"
|
||||
>;
|
||||
|
||||
def FeatureVectorPackedDecimalEnhancement : SystemZFeature<
|
||||
"vector-packed-decimal-enhancement", "VectorPackedDecimalEnhancement",
|
||||
"Assume that the vector packed decimal enhancement facility is installed"
|
||||
>;
|
||||
|
||||
def FeatureEnhancedSort : SystemZFeature<
|
||||
"enhanced-sort", "EnhancedSort",
|
||||
"Assume that the enhanced-sort facility is installed"
|
||||
>;
|
||||
|
||||
def FeatureDeflateConversion : SystemZFeature<
|
||||
"deflate-conversion", "DeflateConversion",
|
||||
"Assume that the deflate-conversion facility is installed"
|
||||
>;
|
||||
|
||||
def Arch13NewFeatures : SystemZFeatureList<[
|
||||
FeatureMiscellaneousExtensions3,
|
||||
FeatureMessageSecurityAssist9,
|
||||
FeatureVectorEnhancements2,
|
||||
FeatureVectorPackedDecimalEnhancement,
|
||||
FeatureEnhancedSort,
|
||||
FeatureDeflateConversion
|
||||
]>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Cumulative supported and unsupported feature sets
|
||||
|
@ -255,9 +300,13 @@ def Arch11SupportedFeatures
|
|||
: SystemZFeatureAdd<Arch10SupportedFeatures.List, Arch11NewFeatures.List>;
|
||||
def Arch12SupportedFeatures
|
||||
: SystemZFeatureAdd<Arch11SupportedFeatures.List, Arch12NewFeatures.List>;
|
||||
def Arch13SupportedFeatures
|
||||
: SystemZFeatureAdd<Arch12SupportedFeatures.List, Arch13NewFeatures.List>;
|
||||
|
||||
def Arch12UnsupportedFeatures
|
||||
def Arch13UnsupportedFeatures
|
||||
: SystemZFeatureList<[]>;
|
||||
def Arch12UnsupportedFeatures
|
||||
: SystemZFeatureAdd<Arch13UnsupportedFeatures.List, Arch13NewFeatures.List>;
|
||||
def Arch11UnsupportedFeatures
|
||||
: SystemZFeatureAdd<Arch12UnsupportedFeatures.List, Arch12NewFeatures.List>;
|
||||
def Arch10UnsupportedFeatures
|
||||
|
|
|
@ -1480,6 +1480,23 @@ void SystemZDAGToDAGISel::Select(SDNode *Node) {
|
|||
Node->getOperand(0).getOpcode() != ISD::Constant)
|
||||
if (auto *Op1 = dyn_cast<ConstantSDNode>(Node->getOperand(1))) {
|
||||
uint64_t Val = Op1->getZExtValue();
|
||||
// Don't split the operation if we can match one of the combined
|
||||
// logical operations provided by miscellaneous-extensions-3.
|
||||
if (Subtarget->hasMiscellaneousExtensions3()) {
|
||||
unsigned ChildOpcode = Node->getOperand(0).getOpcode();
|
||||
// Check whether this expression matches NAND/NOR/NXOR.
|
||||
if (Val == (uint64_t)-1 && Opcode == ISD::XOR)
|
||||
if (ChildOpcode == ISD::AND || ChildOpcode == ISD::OR ||
|
||||
ChildOpcode == ISD::XOR)
|
||||
break;
|
||||
// Check whether this expression matches OR-with-complement.
|
||||
if (Opcode == ISD::OR && ChildOpcode == ISD::XOR) {
|
||||
auto Op0 = Node->getOperand(0);
|
||||
if (auto *Op0Op1 = dyn_cast<ConstantSDNode>(Op0->getOperand(1)))
|
||||
if (Op0Op1->getZExtValue() == (uint64_t)-1)
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!SystemZ::isImmLF(Val) && !SystemZ::isImmHF(Val)) {
|
||||
splitLargeImmediate(Opcode, Node, Node->getOperand(0),
|
||||
Val - uint32_t(Val), uint32_t(Val));
|
||||
|
|
|
@ -252,6 +252,12 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
|
|||
setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Promote);
|
||||
setOperationAction(ISD::CTLZ, MVT::i64, Legal);
|
||||
|
||||
// On arch13 we have native support for a 64-bit CTPOP.
|
||||
if (Subtarget.hasMiscellaneousExtensions3()) {
|
||||
setOperationAction(ISD::CTPOP, MVT::i32, Promote);
|
||||
setOperationAction(ISD::CTPOP, MVT::i64, Legal);
|
||||
}
|
||||
|
||||
// Give LowerOperation the chance to replace 64-bit ORs with subregs.
|
||||
setOperationAction(ISD::OR, MVT::i64, Custom);
|
||||
|
||||
|
@ -377,6 +383,17 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
|
|||
setOperationAction(ISD::UINT_TO_FP, MVT::v2f64, Legal);
|
||||
}
|
||||
|
||||
if (Subtarget.hasVectorEnhancements2()) {
|
||||
setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
|
||||
setOperationAction(ISD::FP_TO_SINT, MVT::v4f32, Legal);
|
||||
setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);
|
||||
setOperationAction(ISD::FP_TO_UINT, MVT::v4f32, Legal);
|
||||
setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
|
||||
setOperationAction(ISD::SINT_TO_FP, MVT::v4f32, Legal);
|
||||
setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal);
|
||||
setOperationAction(ISD::UINT_TO_FP, MVT::v4f32, Legal);
|
||||
}
|
||||
|
||||
// Handle floating-point types.
|
||||
for (unsigned I = MVT::FIRST_FP_VALUETYPE;
|
||||
I <= MVT::LAST_FP_VALUETYPE;
|
||||
|
@ -576,6 +593,7 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
|
|||
setTargetDAGCombine(ISD::SIGN_EXTEND_INREG);
|
||||
setTargetDAGCombine(ISD::LOAD);
|
||||
setTargetDAGCombine(ISD::STORE);
|
||||
setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
|
||||
setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
|
||||
setTargetDAGCombine(ISD::FP_ROUND);
|
||||
setTargetDAGCombine(ISD::FP_EXTEND);
|
||||
|
@ -1809,6 +1827,20 @@ static bool isIntrinsicWithCC(SDValue Op, unsigned &Opcode, unsigned &CCValid) {
|
|||
CCValid = SystemZ::CCMASK_ANY;
|
||||
return true;
|
||||
|
||||
case Intrinsic::s390_vstrsb:
|
||||
case Intrinsic::s390_vstrsh:
|
||||
case Intrinsic::s390_vstrsf:
|
||||
Opcode = SystemZISD::VSTRS_CC;
|
||||
CCValid = SystemZ::CCMASK_ANY;
|
||||
return true;
|
||||
|
||||
case Intrinsic::s390_vstrszb:
|
||||
case Intrinsic::s390_vstrszh:
|
||||
case Intrinsic::s390_vstrszf:
|
||||
Opcode = SystemZISD::VSTRSZ_CC;
|
||||
CCValid = SystemZ::CCMASK_ANY;
|
||||
return true;
|
||||
|
||||
case Intrinsic::s390_vfcedbs:
|
||||
case Intrinsic::s390_vfcesbs:
|
||||
Opcode = SystemZISD::VFCMPES;
|
||||
|
@ -4506,9 +4538,18 @@ static SDValue tryBuildVectorShuffle(SelectionDAG &DAG,
|
|||
return GS.getNode(DAG, SDLoc(BVN));
|
||||
}
|
||||
|
||||
bool SystemZTargetLowering::isVectorElementLoad(SDValue Op) const {
|
||||
if (Op.getOpcode() == ISD::LOAD && cast<LoadSDNode>(Op)->isUnindexed())
|
||||
return true;
|
||||
if (Subtarget.hasVectorEnhancements2() && Op.getOpcode() == SystemZISD::LRV)
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
// Combine GPR scalar values Elems into a vector of type VT.
|
||||
static SDValue buildVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
|
||||
SmallVectorImpl<SDValue> &Elems) {
|
||||
SDValue
|
||||
SystemZTargetLowering::buildVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
|
||||
SmallVectorImpl<SDValue> &Elems) const {
|
||||
// See whether there is a single replicated value.
|
||||
SDValue Single;
|
||||
unsigned int NumElements = Elems.size();
|
||||
|
@ -4537,13 +4578,13 @@ static SDValue buildVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
|
|||
// we would need 2 instructions to replicate it: VLVGP followed by VREPx.
|
||||
// This is only a win if the single defined element is used more than once.
|
||||
// In other cases we're better off using a single VLVGx.
|
||||
if (Single.getNode() && (Count > 1 || Single.getOpcode() == ISD::LOAD))
|
||||
if (Single.getNode() && (Count > 1 || isVectorElementLoad(Single)))
|
||||
return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Single);
|
||||
|
||||
// If all elements are loads, use VLREP/VLEs (below).
|
||||
bool AllLoads = true;
|
||||
for (auto Elem : Elems)
|
||||
if (Elem.getOpcode() != ISD::LOAD || cast<LoadSDNode>(Elem)->isIndexed()) {
|
||||
if (!isVectorElementLoad(Elem)) {
|
||||
AllLoads = false;
|
||||
break;
|
||||
}
|
||||
|
@ -4615,8 +4656,7 @@ static SDValue buildVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
|
|||
std::map<const SDNode*, unsigned> UseCounts;
|
||||
SDNode *LoadMaxUses = nullptr;
|
||||
for (unsigned I = 0; I < NumElements; ++I)
|
||||
if (Elems[I].getOpcode() == ISD::LOAD &&
|
||||
cast<LoadSDNode>(Elems[I])->isUnindexed()) {
|
||||
if (isVectorElementLoad(Elems[I])) {
|
||||
SDNode *Ld = Elems[I].getNode();
|
||||
UseCounts[Ld]++;
|
||||
if (LoadMaxUses == nullptr || UseCounts[LoadMaxUses] < UseCounts[Ld])
|
||||
|
@ -5152,6 +5192,8 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const {
|
|||
OPCODE(VISTR_CC);
|
||||
OPCODE(VSTRC_CC);
|
||||
OPCODE(VSTRCZ_CC);
|
||||
OPCODE(VSTRS_CC);
|
||||
OPCODE(VSTRSZ_CC);
|
||||
OPCODE(TDC);
|
||||
OPCODE(ATOMIC_SWAPW);
|
||||
OPCODE(ATOMIC_LOADW_ADD);
|
||||
|
@ -5171,6 +5213,8 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const {
|
|||
OPCODE(ATOMIC_CMP_SWAP_128);
|
||||
OPCODE(LRV);
|
||||
OPCODE(STRV);
|
||||
OPCODE(VLER);
|
||||
OPCODE(VSTER);
|
||||
OPCODE(PREFETCH);
|
||||
}
|
||||
return nullptr;
|
||||
|
@ -5484,6 +5528,31 @@ SDValue SystemZTargetLowering::combineLOAD(
|
|||
return SDValue(N, 0);
|
||||
}
|
||||
|
||||
bool SystemZTargetLowering::canLoadStoreByteSwapped(EVT VT) const {
|
||||
if (VT == MVT::i16 || VT == MVT::i32 || VT == MVT::i64)
|
||||
return true;
|
||||
if (Subtarget.hasVectorEnhancements2())
|
||||
if (VT == MVT::v8i16 || VT == MVT::v4i32 || VT == MVT::v2i64)
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool isVectorElementSwap(ArrayRef<int> M, EVT VT) {
|
||||
if (!VT.isVector() || !VT.isSimple() ||
|
||||
VT.getSizeInBits() != 128 ||
|
||||
VT.getScalarSizeInBits() % 8 != 0)
|
||||
return false;
|
||||
|
||||
unsigned NumElts = VT.getVectorNumElements();
|
||||
for (unsigned i = 0; i < NumElts; ++i) {
|
||||
if (M[i] < 0) continue; // ignore UNDEF indices
|
||||
if ((unsigned) M[i] != NumElts - 1 - i)
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
SDValue SystemZTargetLowering::combineSTORE(
|
||||
SDNode *N, DAGCombinerInfo &DCI) const {
|
||||
SelectionDAG &DAG = DCI.DAG;
|
||||
|
@ -5505,13 +5574,11 @@ SDValue SystemZTargetLowering::combineSTORE(
|
|||
SN->getMemOperand());
|
||||
}
|
||||
}
|
||||
// Combine STORE (BSWAP) into STRVH/STRV/STRVG
|
||||
// Combine STORE (BSWAP) into STRVH/STRV/STRVG/VSTBR
|
||||
if (!SN->isTruncatingStore() &&
|
||||
Op1.getOpcode() == ISD::BSWAP &&
|
||||
Op1.getNode()->hasOneUse() &&
|
||||
(Op1.getValueType() == MVT::i16 ||
|
||||
Op1.getValueType() == MVT::i32 ||
|
||||
Op1.getValueType() == MVT::i64)) {
|
||||
canLoadStoreByteSwapped(Op1.getValueType())) {
|
||||
|
||||
SDValue BSwapOp = Op1.getOperand(0);
|
||||
|
||||
|
@ -5526,15 +5593,97 @@ SDValue SystemZTargetLowering::combineSTORE(
|
|||
DAG.getMemIntrinsicNode(SystemZISD::STRV, SDLoc(N), DAG.getVTList(MVT::Other),
|
||||
Ops, MemVT, SN->getMemOperand());
|
||||
}
|
||||
// Combine STORE (element-swap) into VSTER
|
||||
if (!SN->isTruncatingStore() &&
|
||||
Op1.getOpcode() == ISD::VECTOR_SHUFFLE &&
|
||||
Op1.getNode()->hasOneUse() &&
|
||||
Subtarget.hasVectorEnhancements2()) {
|
||||
ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op1.getNode());
|
||||
ArrayRef<int> ShuffleMask = SVN->getMask();
|
||||
if (isVectorElementSwap(ShuffleMask, Op1.getValueType())) {
|
||||
SDValue Ops[] = {
|
||||
N->getOperand(0), Op1.getOperand(0), N->getOperand(2)
|
||||
};
|
||||
|
||||
return DAG.getMemIntrinsicNode(SystemZISD::VSTER, SDLoc(N),
|
||||
DAG.getVTList(MVT::Other),
|
||||
Ops, MemVT, SN->getMemOperand());
|
||||
}
|
||||
}
|
||||
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
SDValue SystemZTargetLowering::combineVECTOR_SHUFFLE(
|
||||
SDNode *N, DAGCombinerInfo &DCI) const {
|
||||
SelectionDAG &DAG = DCI.DAG;
|
||||
// Combine element-swap (LOAD) into VLER
|
||||
if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
|
||||
N->getOperand(0).hasOneUse() &&
|
||||
Subtarget.hasVectorEnhancements2()) {
|
||||
ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
|
||||
ArrayRef<int> ShuffleMask = SVN->getMask();
|
||||
if (isVectorElementSwap(ShuffleMask, N->getValueType(0))) {
|
||||
SDValue Load = N->getOperand(0);
|
||||
LoadSDNode *LD = cast<LoadSDNode>(Load);
|
||||
|
||||
// Create the element-swapping load.
|
||||
SDValue Ops[] = {
|
||||
LD->getChain(), // Chain
|
||||
LD->getBasePtr() // Ptr
|
||||
};
|
||||
SDValue ESLoad =
|
||||
DAG.getMemIntrinsicNode(SystemZISD::VLER, SDLoc(N),
|
||||
DAG.getVTList(LD->getValueType(0), MVT::Other),
|
||||
Ops, LD->getMemoryVT(), LD->getMemOperand());
|
||||
|
||||
// First, combine the VECTOR_SHUFFLE away. This makes the value produced
|
||||
// by the load dead.
|
||||
DCI.CombineTo(N, ESLoad);
|
||||
|
||||
// Next, combine the load away, we give it a bogus result value but a real
|
||||
// chain result. The result value is dead because the shuffle is dead.
|
||||
DCI.CombineTo(Load.getNode(), ESLoad, ESLoad.getValue(1));
|
||||
|
||||
// Return N so it doesn't get rechecked!
|
||||
return SDValue(N, 0);
|
||||
}
|
||||
}
|
||||
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
SDValue SystemZTargetLowering::combineEXTRACT_VECTOR_ELT(
|
||||
SDNode *N, DAGCombinerInfo &DCI) const {
|
||||
SelectionDAG &DAG = DCI.DAG;
|
||||
|
||||
if (!Subtarget.hasVector())
|
||||
return SDValue();
|
||||
|
||||
// Look through bitcasts that retain the number of vector elements.
|
||||
SDValue Op = N->getOperand(0);
|
||||
if (Op.getOpcode() == ISD::BITCAST &&
|
||||
Op.getValueType().isVector() &&
|
||||
Op.getOperand(0).getValueType().isVector() &&
|
||||
Op.getValueType().getVectorNumElements() ==
|
||||
Op.getOperand(0).getValueType().getVectorNumElements())
|
||||
Op = Op.getOperand(0);
|
||||
|
||||
// Pull BSWAP out of a vector extraction.
|
||||
if (Op.getOpcode() == ISD::BSWAP && Op.hasOneUse()) {
|
||||
EVT VecVT = Op.getValueType();
|
||||
EVT EltVT = VecVT.getVectorElementType();
|
||||
Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), EltVT,
|
||||
Op.getOperand(0), N->getOperand(1));
|
||||
DCI.AddToWorklist(Op.getNode());
|
||||
Op = DAG.getNode(ISD::BSWAP, SDLoc(N), EltVT, Op);
|
||||
if (EltVT != N->getValueType(0)) {
|
||||
DCI.AddToWorklist(Op.getNode());
|
||||
Op = DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0), Op);
|
||||
}
|
||||
return Op;
|
||||
}
|
||||
|
||||
// Try to simplify a vector extraction.
|
||||
if (auto *IndexN = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
|
||||
SDValue Op0 = N->getOperand(0);
|
||||
|
@ -5660,11 +5809,10 @@ SDValue SystemZTargetLowering::combineFP_EXTEND(
|
|||
SDValue SystemZTargetLowering::combineBSWAP(
|
||||
SDNode *N, DAGCombinerInfo &DCI) const {
|
||||
SelectionDAG &DAG = DCI.DAG;
|
||||
// Combine BSWAP (LOAD) into LRVH/LRV/LRVG
|
||||
// Combine BSWAP (LOAD) into LRVH/LRV/LRVG/VLBR
|
||||
if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
|
||||
N->getOperand(0).hasOneUse() &&
|
||||
(N->getValueType(0) == MVT::i16 || N->getValueType(0) == MVT::i32 ||
|
||||
N->getValueType(0) == MVT::i64)) {
|
||||
canLoadStoreByteSwapped(N->getValueType(0))) {
|
||||
SDValue Load = N->getOperand(0);
|
||||
LoadSDNode *LD = cast<LoadSDNode>(Load);
|
||||
|
||||
|
@ -5697,6 +5845,74 @@ SDValue SystemZTargetLowering::combineBSWAP(
|
|||
// Return N so it doesn't get rechecked!
|
||||
return SDValue(N, 0);
|
||||
}
|
||||
|
||||
// Look through bitcasts that retain the number of vector elements.
|
||||
SDValue Op = N->getOperand(0);
|
||||
if (Op.getOpcode() == ISD::BITCAST &&
|
||||
Op.getValueType().isVector() &&
|
||||
Op.getOperand(0).getValueType().isVector() &&
|
||||
Op.getValueType().getVectorNumElements() ==
|
||||
Op.getOperand(0).getValueType().getVectorNumElements())
|
||||
Op = Op.getOperand(0);
|
||||
|
||||
// Push BSWAP into a vector insertion if at least one side then simplifies.
|
||||
if (Op.getOpcode() == ISD::INSERT_VECTOR_ELT && Op.hasOneUse()) {
|
||||
SDValue Vec = Op.getOperand(0);
|
||||
SDValue Elt = Op.getOperand(1);
|
||||
SDValue Idx = Op.getOperand(2);
|
||||
|
||||
if (DAG.isConstantIntBuildVectorOrConstantInt(Vec) ||
|
||||
Vec.getOpcode() == ISD::BSWAP || Vec.isUndef() ||
|
||||
DAG.isConstantIntBuildVectorOrConstantInt(Elt) ||
|
||||
Elt.getOpcode() == ISD::BSWAP || Elt.isUndef() ||
|
||||
(canLoadStoreByteSwapped(N->getValueType(0)) &&
|
||||
ISD::isNON_EXTLoad(Elt.getNode()) && Elt.hasOneUse())) {
|
||||
EVT VecVT = N->getValueType(0);
|
||||
EVT EltVT = N->getValueType(0).getVectorElementType();
|
||||
if (VecVT != Vec.getValueType()) {
|
||||
Vec = DAG.getNode(ISD::BITCAST, SDLoc(N), VecVT, Vec);
|
||||
DCI.AddToWorklist(Vec.getNode());
|
||||
}
|
||||
if (EltVT != Elt.getValueType()) {
|
||||
Elt = DAG.getNode(ISD::BITCAST, SDLoc(N), EltVT, Elt);
|
||||
DCI.AddToWorklist(Elt.getNode());
|
||||
}
|
||||
Vec = DAG.getNode(ISD::BSWAP, SDLoc(N), VecVT, Vec);
|
||||
DCI.AddToWorklist(Vec.getNode());
|
||||
Elt = DAG.getNode(ISD::BSWAP, SDLoc(N), EltVT, Elt);
|
||||
DCI.AddToWorklist(Elt.getNode());
|
||||
return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), VecVT,
|
||||
Vec, Elt, Idx);
|
||||
}
|
||||
}
|
||||
|
||||
// Push BSWAP into a vector shuffle if at least one side then simplifies.
|
||||
ShuffleVectorSDNode *SV = dyn_cast<ShuffleVectorSDNode>(Op);
|
||||
if (SV && Op.hasOneUse()) {
|
||||
SDValue Op0 = Op.getOperand(0);
|
||||
SDValue Op1 = Op.getOperand(1);
|
||||
|
||||
if (DAG.isConstantIntBuildVectorOrConstantInt(Op0) ||
|
||||
Op0.getOpcode() == ISD::BSWAP || Op0.isUndef() ||
|
||||
DAG.isConstantIntBuildVectorOrConstantInt(Op1) ||
|
||||
Op1.getOpcode() == ISD::BSWAP || Op1.isUndef()) {
|
||||
EVT VecVT = N->getValueType(0);
|
||||
if (VecVT != Op0.getValueType()) {
|
||||
Op0 = DAG.getNode(ISD::BITCAST, SDLoc(N), VecVT, Op0);
|
||||
DCI.AddToWorklist(Op0.getNode());
|
||||
}
|
||||
if (VecVT != Op1.getValueType()) {
|
||||
Op1 = DAG.getNode(ISD::BITCAST, SDLoc(N), VecVT, Op1);
|
||||
DCI.AddToWorklist(Op1.getNode());
|
||||
}
|
||||
Op0 = DAG.getNode(ISD::BSWAP, SDLoc(N), VecVT, Op0);
|
||||
DCI.AddToWorklist(Op0.getNode());
|
||||
Op1 = DAG.getNode(ISD::BSWAP, SDLoc(N), VecVT, Op1);
|
||||
DCI.AddToWorklist(Op1.getNode());
|
||||
return DAG.getVectorShuffle(VecVT, SDLoc(N), Op0, Op1, SV->getMask());
|
||||
}
|
||||
}
|
||||
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
|
@ -5919,6 +6135,7 @@ SDValue SystemZTargetLowering::PerformDAGCombine(SDNode *N,
|
|||
case SystemZISD::MERGE_LOW: return combineMERGE(N, DCI);
|
||||
case ISD::LOAD: return combineLOAD(N, DCI);
|
||||
case ISD::STORE: return combineSTORE(N, DCI);
|
||||
case ISD::VECTOR_SHUFFLE: return combineVECTOR_SHUFFLE(N, DCI);
|
||||
case ISD::EXTRACT_VECTOR_ELT: return combineEXTRACT_VECTOR_ELT(N, DCI);
|
||||
case SystemZISD::JOIN_DWORDS: return combineJOIN_DWORDS(N, DCI);
|
||||
case ISD::FP_ROUND: return combineFP_ROUND(N, DCI);
|
||||
|
|
|
@ -281,6 +281,8 @@ enum NodeType : unsigned {
|
|||
VISTR_CC,
|
||||
VSTRC_CC,
|
||||
VSTRCZ_CC,
|
||||
VSTRS_CC,
|
||||
VSTRSZ_CC,
|
||||
|
||||
// Test Data Class.
|
||||
//
|
||||
|
@ -340,6 +342,9 @@ enum NodeType : unsigned {
|
|||
// Byte swapping load/store. Same operands as regular load/store.
|
||||
LRV, STRV,
|
||||
|
||||
// Element swapping load/store. Same operands as regular load/store.
|
||||
VLER, VSTER,
|
||||
|
||||
// Prefetch from the second operand using the 4-bit control code in
|
||||
// the first operand. The code is 1 for a load prefetch and 2 for
|
||||
// a store prefetch.
|
||||
|
@ -571,6 +576,9 @@ private:
|
|||
SDValue lowerPREFETCH(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue lowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue lowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
|
||||
bool isVectorElementLoad(SDValue Op) const;
|
||||
SDValue buildVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
|
||||
SmallVectorImpl<SDValue> &Elems) const;
|
||||
SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue lowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const;
|
||||
|
@ -590,8 +598,10 @@ private:
|
|||
SDValue combineSIGN_EXTEND(SDNode *N, DAGCombinerInfo &DCI) const;
|
||||
SDValue combineSIGN_EXTEND_INREG(SDNode *N, DAGCombinerInfo &DCI) const;
|
||||
SDValue combineMERGE(SDNode *N, DAGCombinerInfo &DCI) const;
|
||||
bool canLoadStoreByteSwapped(EVT VT) const;
|
||||
SDValue combineLOAD(SDNode *N, DAGCombinerInfo &DCI) const;
|
||||
SDValue combineSTORE(SDNode *N, DAGCombinerInfo &DCI) const;
|
||||
SDValue combineVECTOR_SHUFFLE(SDNode *N, DAGCombinerInfo &DCI) const;
|
||||
SDValue combineEXTRACT_VECTOR_ELT(SDNode *N, DAGCombinerInfo &DCI) const;
|
||||
SDValue combineJOIN_DWORDS(SDNode *N, DAGCombinerInfo &DCI) const;
|
||||
SDValue combineFP_ROUND(SDNode *N, DAGCombinerInfo &DCI) const;
|
||||
|
|
|
@ -1414,13 +1414,15 @@ class InstVRRi<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
|
|||
bits<4> R1;
|
||||
bits<5> V2;
|
||||
bits<4> M3;
|
||||
bits<4> M4;
|
||||
|
||||
let Inst{47-40} = op{15-8};
|
||||
let Inst{39-36} = R1;
|
||||
let Inst{35-32} = V2{3-0};
|
||||
let Inst{31-24} = 0;
|
||||
let Inst{23-20} = M3;
|
||||
let Inst{19-12} = 0;
|
||||
let Inst{19-16} = M4;
|
||||
let Inst{15-12} = 0;
|
||||
let Inst{11} = 0;
|
||||
let Inst{10} = V2{4};
|
||||
let Inst{9-8} = 0;
|
||||
|
@ -2489,12 +2491,18 @@ class StoreVRX<string mnemonic, bits<16> opcode, SDPatternOperator operator,
|
|||
TypedReg tr, bits<5> bytes, bits<4> type = 0>
|
||||
: InstVRX<opcode, (outs), (ins tr.op:$V1, bdxaddr12only:$XBD2),
|
||||
mnemonic#"\t$V1, $XBD2",
|
||||
[(set (tr.vt tr.op:$V1), (operator bdxaddr12only:$XBD2))]> {
|
||||
[(operator (tr.vt tr.op:$V1), bdxaddr12only:$XBD2)]> {
|
||||
let M3 = type;
|
||||
let mayStore = 1;
|
||||
let AccessBytes = bytes;
|
||||
}
|
||||
|
||||
class StoreVRXGeneric<string mnemonic, bits<16> opcode>
|
||||
: InstVRX<opcode, (outs), (ins VR128:$V1, bdxaddr12only:$XBD2, imm32zx4:$M3),
|
||||
mnemonic#"\t$V1, $XBD2, $M3", []> {
|
||||
let mayStore = 1;
|
||||
}
|
||||
|
||||
multiclass StoreVRXAlign<string mnemonic, bits<16> opcode> {
|
||||
let mayStore = 1, AccessBytes = 16 in {
|
||||
def Align : InstVRX<opcode, (outs),
|
||||
|
@ -3151,6 +3159,11 @@ class BinaryRRFb<string mnemonic, bits<16> opcode, SDPatternOperator operator,
|
|||
let M4 = 0;
|
||||
}
|
||||
|
||||
class BinaryRRFc<string mnemonic, bits<16> opcode,
|
||||
RegisterOperand cls1, RegisterOperand cls2>
|
||||
: InstRRFc<opcode, (outs cls1:$R1), (ins cls2:$R2, imm32zx4:$M3),
|
||||
mnemonic#"\t$R1, $R2, $M3", []>;
|
||||
|
||||
class BinaryMemRRFc<string mnemonic, bits<16> opcode,
|
||||
RegisterOperand cls1, RegisterOperand cls2, Immediate imm>
|
||||
: InstRRFc<opcode, (outs cls2:$R2, cls1:$R1), (ins cls1:$R1src, imm:$M3),
|
||||
|
@ -3218,6 +3231,41 @@ multiclass CondBinaryRRFPair<string mnemonic, bits<16> opcode,
|
|||
def Asm : AsmCondBinaryRRF<mnemonic, opcode, cls1, cls2>;
|
||||
}
|
||||
|
||||
class CondBinaryRRFa<string mnemonic, bits<16> opcode, RegisterOperand cls1,
|
||||
RegisterOperand cls2, RegisterOperand cls3>
|
||||
: InstRRFa<opcode, (outs cls1:$R1),
|
||||
(ins cls3:$R3, cls2:$R2, cond4:$valid, cond4:$M4),
|
||||
mnemonic#"$M4\t$R1, $R2, $R3",
|
||||
[(set cls1:$R1, (z_select_ccmask cls2:$R2, cls3:$R3,
|
||||
cond4:$valid, cond4:$M4))]> {
|
||||
let CCMaskLast = 1;
|
||||
}
|
||||
|
||||
// Like CondBinaryRRFa, but used for the raw assembly form. The condition-code
|
||||
// mask is the third operand rather than being part of the mnemonic.
|
||||
class AsmCondBinaryRRFa<string mnemonic, bits<16> opcode, RegisterOperand cls1,
|
||||
RegisterOperand cls2, RegisterOperand cls3>
|
||||
: InstRRFa<opcode, (outs cls1:$R1), (ins cls3:$R3, cls2:$R2, imm32zx4:$M4),
|
||||
mnemonic#"\t$R1, $R2, $R3, $M4", []>;
|
||||
|
||||
// Like CondBinaryRRFa, but with a fixed CC mask.
|
||||
class FixedCondBinaryRRFa<CondVariant V, string mnemonic, bits<16> opcode,
|
||||
RegisterOperand cls1, RegisterOperand cls2,
|
||||
RegisterOperand cls3>
|
||||
: InstRRFa<opcode, (outs cls1:$R1), (ins cls3:$R3, cls2:$R2),
|
||||
mnemonic#V.suffix#"\t$R1, $R2, $R3", []> {
|
||||
let isAsmParserOnly = V.alternate;
|
||||
let M4 = V.ccmask;
|
||||
}
|
||||
|
||||
multiclass CondBinaryRRFaPair<string mnemonic, bits<16> opcode,
|
||||
RegisterOperand cls1, RegisterOperand cls2,
|
||||
RegisterOperand cls3> {
|
||||
let isCodeGenOnly = 1 in
|
||||
def "" : CondBinaryRRFa<mnemonic, opcode, cls1, cls2, cls3>;
|
||||
def Asm : AsmCondBinaryRRFa<mnemonic, opcode, cls1, cls2, cls3>;
|
||||
}
|
||||
|
||||
class BinaryRI<string mnemonic, bits<12> opcode, SDPatternOperator operator,
|
||||
RegisterOperand cls, Immediate imm>
|
||||
: InstRIa<opcode, (outs cls:$R1), (ins cls:$R1src, imm:$I2),
|
||||
|
@ -3612,7 +3660,9 @@ class BinaryVRRf<string mnemonic, bits<16> opcode, SDPatternOperator operator,
|
|||
|
||||
class BinaryVRRi<string mnemonic, bits<16> opcode, RegisterOperand cls>
|
||||
: InstVRRi<opcode, (outs cls:$R1), (ins VR128:$V2, imm32zx4:$M3),
|
||||
mnemonic#"\t$R1, $V2, $M3", []>;
|
||||
mnemonic#"\t$R1, $V2, $M3", []> {
|
||||
let M4 = 0;
|
||||
}
|
||||
|
||||
class BinaryVRSa<string mnemonic, bits<16> opcode, SDPatternOperator operator,
|
||||
TypedReg tr1, TypedReg tr2, bits<4> type>
|
||||
|
@ -3990,6 +4040,17 @@ class SideEffectTernaryRRFa<string mnemonic, bits<16> opcode,
|
|||
let M4 = 0;
|
||||
}
|
||||
|
||||
class SideEffectTernaryMemMemRRFa<string mnemonic, bits<16> opcode,
|
||||
RegisterOperand cls1, RegisterOperand cls2,
|
||||
RegisterOperand cls3>
|
||||
: InstRRFa<opcode, (outs cls1:$R1, cls2:$R2),
|
||||
(ins cls1:$R1src, cls2:$R2src, cls3:$R3),
|
||||
mnemonic#"\t$R1, $R2, $R3", []> {
|
||||
let Constraints = "$R1 = $R1src, $R2 = $R2src";
|
||||
let DisableEncoding = "$R1src, $R2src";
|
||||
let M4 = 0;
|
||||
}
|
||||
|
||||
class SideEffectTernaryRRFb<string mnemonic, bits<16> opcode,
|
||||
RegisterOperand cls1, RegisterOperand cls2,
|
||||
RegisterOperand cls3>
|
||||
|
@ -4278,7 +4339,7 @@ class TernaryVRRcFloatGeneric<string mnemonic, bits<16> opcode>
|
|||
mnemonic#"\t$V1, $V2, $V3, $M4, $M5, $M6", []>;
|
||||
|
||||
class TernaryVRRd<string mnemonic, bits<16> opcode, SDPatternOperator operator,
|
||||
TypedReg tr1, TypedReg tr2, bits<4> type = 0>
|
||||
TypedReg tr1, TypedReg tr2, bits<4> type = 0, bits<4> m6 = 0>
|
||||
: InstVRRd<opcode, (outs tr1.op:$V1),
|
||||
(ins tr2.op:$V2, tr2.op:$V3, tr1.op:$V4),
|
||||
mnemonic#"\t$V1, $V2, $V3, $V4",
|
||||
|
@ -4286,7 +4347,7 @@ class TernaryVRRd<string mnemonic, bits<16> opcode, SDPatternOperator operator,
|
|||
(tr2.vt tr2.op:$V3),
|
||||
(tr1.vt tr1.op:$V4)))]> {
|
||||
let M5 = type;
|
||||
let M6 = 0;
|
||||
let M6 = m6;
|
||||
}
|
||||
|
||||
class TernaryVRRdGeneric<string mnemonic, bits<16> opcode>
|
||||
|
@ -4296,6 +4357,34 @@ class TernaryVRRdGeneric<string mnemonic, bits<16> opcode>
|
|||
let M6 = 0;
|
||||
}
|
||||
|
||||
// Ternary operation where the assembler mnemonic has an extra operand to
|
||||
// optionally allow specifiying arbitrary M6 values.
|
||||
multiclass TernaryExtraVRRd<string mnemonic, bits<16> opcode,
|
||||
SDPatternOperator operator,
|
||||
TypedReg tr1, TypedReg tr2, bits<4> type> {
|
||||
let M5 = type, Defs = [CC] in
|
||||
def "" : InstVRRd<opcode, (outs tr1.op:$V1),
|
||||
(ins tr2.op:$V2, tr2.op:$V3, tr1.op:$V4, imm32zx4:$M6),
|
||||
mnemonic#"\t$V1, $V2, $V3, $V4, $M6", []>;
|
||||
def : Pat<(operator (tr2.vt tr2.op:$V2), (tr2.vt tr2.op:$V3),
|
||||
(tr1.vt tr1.op:$V4)),
|
||||
(!cast<Instruction>(NAME) tr2.op:$V2, tr2.op:$V3, tr1.op:$V4, 0)>;
|
||||
def : InstAlias<mnemonic#"\t$V1, $V2, $V3, $V4",
|
||||
(!cast<Instruction>(NAME) tr1.op:$V1, tr2.op:$V2,
|
||||
tr2.op:$V3, tr1.op:$V4, 0)>;
|
||||
}
|
||||
|
||||
multiclass TernaryExtraVRRdGeneric<string mnemonic, bits<16> opcode> {
|
||||
let Defs = [CC] in
|
||||
def "" : InstVRRd<opcode, (outs VR128:$V1),
|
||||
(ins VR128:$V2, VR128:$V3, VR128:$V4,
|
||||
imm32zx4:$M5, imm32zx4:$M6),
|
||||
mnemonic#"\t$V1, $V2, $V3, $V4, $M5, $M6", []>;
|
||||
def : InstAlias<mnemonic#"\t$V1, $V2, $V3, $V4, $M5",
|
||||
(!cast<Instruction>(NAME) VR128:$V1, VR128:$V2, VR128:$V3,
|
||||
VR128:$V4, imm32zx4:$M5, 0)>;
|
||||
}
|
||||
|
||||
class TernaryVRRe<string mnemonic, bits<16> opcode, SDPatternOperator operator,
|
||||
TypedReg tr1, TypedReg tr2, bits<4> m5 = 0, bits<4> type = 0>
|
||||
: InstVRRe<opcode, (outs tr1.op:$V1),
|
||||
|
@ -4326,6 +4415,11 @@ class TernaryVRSb<string mnemonic, bits<16> opcode, SDPatternOperator operator,
|
|||
let M4 = type;
|
||||
}
|
||||
|
||||
class TernaryVRRi<string mnemonic, bits<16> opcode, RegisterOperand cls>
|
||||
: InstVRRi<opcode, (outs cls:$R1), (ins VR128:$V2,
|
||||
imm32zx4:$M3, imm32zx4:$M4),
|
||||
mnemonic#"\t$R1, $V2, $M3, $M4", []>;
|
||||
|
||||
class TernaryVRSbGeneric<string mnemonic, bits<16> opcode>
|
||||
: InstVRSb<opcode, (outs VR128:$V1),
|
||||
(ins VR128:$V1src, GR64:$R3, shift12only:$BD2, imm32zx4:$M4),
|
||||
|
@ -4705,6 +4799,17 @@ class CondBinaryRRFPseudo<RegisterOperand cls1, RegisterOperand cls2>
|
|||
let CCMaskLast = 1;
|
||||
}
|
||||
|
||||
// Like CondBinaryRRFa, but expanded after RA depending on the choice of
|
||||
// register.
|
||||
class CondBinaryRRFaPseudo<RegisterOperand cls1, RegisterOperand cls2,
|
||||
RegisterOperand cls3>
|
||||
: Pseudo<(outs cls1:$R1),
|
||||
(ins cls3:$R3, cls2:$R2, cond4:$valid, cond4:$M4),
|
||||
[(set cls1:$R1, (z_select_ccmask cls2:$R2, cls3:$R3,
|
||||
cond4:$valid, cond4:$M4))]> {
|
||||
let CCMaskLast = 1;
|
||||
}
|
||||
|
||||
// Like CondBinaryRIE, but expanded after RA depending on the choice of
|
||||
// register.
|
||||
class CondBinaryRIEPseudo<RegisterOperand cls, Immediate imm>
|
||||
|
|
|
@ -223,6 +223,65 @@ void SystemZInstrInfo::expandLOCRPseudo(MachineInstr &MI, unsigned LowOpcode,
|
|||
// correctly. This change is defered to the SystemZExpandPseudo pass.
|
||||
}
|
||||
|
||||
// MI is a select pseudo instruction. Replace it with LowOpcode if source
|
||||
// and destination are all low GR32s and HighOpcode if source and destination
|
||||
// are all high GR32s. Otherwise, use the two-operand MixedOpcode.
|
||||
void SystemZInstrInfo::expandSELRPseudo(MachineInstr &MI, unsigned LowOpcode,
|
||||
unsigned HighOpcode,
|
||||
unsigned MixedOpcode) const {
|
||||
unsigned DestReg = MI.getOperand(0).getReg();
|
||||
unsigned Src1Reg = MI.getOperand(1).getReg();
|
||||
unsigned Src2Reg = MI.getOperand(2).getReg();
|
||||
bool DestIsHigh = isHighReg(DestReg);
|
||||
bool Src1IsHigh = isHighReg(Src1Reg);
|
||||
bool Src2IsHigh = isHighReg(Src2Reg);
|
||||
|
||||
// If sources and destination aren't all high or all low, we may be able to
|
||||
// simplify the operation by moving one of the sources to the destination
|
||||
// first. But only if this doesn't clobber the other source.
|
||||
if (DestReg != Src1Reg && DestReg != Src2Reg) {
|
||||
if (DestIsHigh != Src1IsHigh) {
|
||||
emitGRX32Move(*MI.getParent(), MI, MI.getDebugLoc(), DestReg, Src1Reg,
|
||||
SystemZ::LR, 32, MI.getOperand(1).isKill(),
|
||||
MI.getOperand(1).isUndef());
|
||||
MI.getOperand(1).setReg(DestReg);
|
||||
Src1Reg = DestReg;
|
||||
Src1IsHigh = DestIsHigh;
|
||||
} else if (DestIsHigh != Src2IsHigh) {
|
||||
emitGRX32Move(*MI.getParent(), MI, MI.getDebugLoc(), DestReg, Src2Reg,
|
||||
SystemZ::LR, 32, MI.getOperand(2).isKill(),
|
||||
MI.getOperand(2).isUndef());
|
||||
MI.getOperand(2).setReg(DestReg);
|
||||
Src2Reg = DestReg;
|
||||
Src2IsHigh = DestIsHigh;
|
||||
}
|
||||
}
|
||||
|
||||
// If the destination (now) matches one source, prefer this to be first.
|
||||
if (DestReg != Src1Reg && DestReg == Src2Reg) {
|
||||
commuteInstruction(MI, false, 1, 2);
|
||||
std::swap(Src1Reg, Src2Reg);
|
||||
std::swap(Src1IsHigh, Src2IsHigh);
|
||||
}
|
||||
|
||||
if (!DestIsHigh && !Src1IsHigh && !Src2IsHigh)
|
||||
MI.setDesc(get(LowOpcode));
|
||||
else if (DestIsHigh && Src1IsHigh && Src2IsHigh)
|
||||
MI.setDesc(get(HighOpcode));
|
||||
else {
|
||||
// Given the simplifcation above, we must already have a two-operand case.
|
||||
assert (DestReg == Src1Reg);
|
||||
MI.setDesc(get(MixedOpcode));
|
||||
MI.tieOperands(0, 1);
|
||||
LOCRMuxJumps++;
|
||||
}
|
||||
|
||||
// If we were unable to implement the pseudo with a single instruction, we
|
||||
// need to convert it back into a branch sequence. This cannot be done here
|
||||
// since the caller of expandPostRAPseudo does not handle changes to the CFG
|
||||
// correctly. This change is defered to the SystemZExpandPseudo pass.
|
||||
}
|
||||
|
||||
// MI is an RR-style pseudo instruction that zero-extends the low Size bits
|
||||
// of one GRX32 into another. Replace it with LowOpcode if both operands
|
||||
// are low registers, otherwise use RISB[LH]G.
|
||||
|
@ -312,6 +371,10 @@ MachineInstr *SystemZInstrInfo::commuteInstructionImpl(MachineInstr &MI,
|
|||
};
|
||||
|
||||
switch (MI.getOpcode()) {
|
||||
case SystemZ::SELRMux:
|
||||
case SystemZ::SELFHR:
|
||||
case SystemZ::SELR:
|
||||
case SystemZ::SELGR:
|
||||
case SystemZ::LOCRMux:
|
||||
case SystemZ::LOCFHR:
|
||||
case SystemZ::LOCR:
|
||||
|
@ -606,7 +669,9 @@ void SystemZInstrInfo::insertSelect(MachineBasicBlock &MBB,
|
|||
|
||||
unsigned Opc;
|
||||
if (SystemZ::GRX32BitRegClass.hasSubClassEq(RC)) {
|
||||
if (STI.hasLoadStoreOnCond2())
|
||||
if (STI.hasMiscellaneousExtensions3())
|
||||
Opc = SystemZ::SELRMux;
|
||||
else if (STI.hasLoadStoreOnCond2())
|
||||
Opc = SystemZ::LOCRMux;
|
||||
else {
|
||||
Opc = SystemZ::LOCR;
|
||||
|
@ -618,9 +683,12 @@ void SystemZInstrInfo::insertSelect(MachineBasicBlock &MBB,
|
|||
TrueReg = TReg;
|
||||
FalseReg = FReg;
|
||||
}
|
||||
} else if (SystemZ::GR64BitRegClass.hasSubClassEq(RC))
|
||||
Opc = SystemZ::LOCGR;
|
||||
else
|
||||
} else if (SystemZ::GR64BitRegClass.hasSubClassEq(RC)) {
|
||||
if (STI.hasMiscellaneousExtensions3())
|
||||
Opc = SystemZ::SELGR;
|
||||
else
|
||||
Opc = SystemZ::LOCGR;
|
||||
} else
|
||||
llvm_unreachable("Invalid register class");
|
||||
|
||||
BuildMI(MBB, I, DL, get(Opc), DstReg)
|
||||
|
@ -643,7 +711,11 @@ bool SystemZInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
|
|||
unsigned NewUseOpc;
|
||||
unsigned UseIdx;
|
||||
int CommuteIdx = -1;
|
||||
bool TieOps = false;
|
||||
switch (UseOpc) {
|
||||
case SystemZ::SELRMux:
|
||||
TieOps = true;
|
||||
/* fall through */
|
||||
case SystemZ::LOCRMux:
|
||||
if (!STI.hasLoadStoreOnCond2())
|
||||
return false;
|
||||
|
@ -655,6 +727,9 @@ bool SystemZInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
|
|||
else
|
||||
return false;
|
||||
break;
|
||||
case SystemZ::SELGR:
|
||||
TieOps = true;
|
||||
/* fall through */
|
||||
case SystemZ::LOCGR:
|
||||
if (!STI.hasLoadStoreOnCond2())
|
||||
return false;
|
||||
|
@ -676,6 +751,8 @@ bool SystemZInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
|
|||
|
||||
bool DeleteDef = MRI->hasOneNonDBGUse(Reg);
|
||||
UseMI.setDesc(get(NewUseOpc));
|
||||
if (TieOps)
|
||||
UseMI.tieOperands(0, 1);
|
||||
UseMI.getOperand(UseIdx).ChangeToImmediate(ImmVal);
|
||||
if (DeleteDef)
|
||||
DefMI.eraseFromParent();
|
||||
|
@ -1285,6 +1362,11 @@ bool SystemZInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
|
|||
expandLOCRPseudo(MI, SystemZ::LOCR, SystemZ::LOCFHR);
|
||||
return true;
|
||||
|
||||
case SystemZ::SELRMux:
|
||||
expandSELRPseudo(MI, SystemZ::SELR, SystemZ::SELFHR,
|
||||
SystemZ::LOCRMux);
|
||||
return true;
|
||||
|
||||
case SystemZ::STCMux:
|
||||
expandRXYPseudo(MI, SystemZ::STC, SystemZ::STCH);
|
||||
return true;
|
||||
|
|
|
@ -162,6 +162,8 @@ class SystemZInstrInfo : public SystemZGenInstrInfo {
|
|||
unsigned HighOpcode) const;
|
||||
void expandLOCRPseudo(MachineInstr &MI, unsigned LowOpcode,
|
||||
unsigned HighOpcode) const;
|
||||
void expandSELRPseudo(MachineInstr &MI, unsigned LowOpcode,
|
||||
unsigned HighOpcode, unsigned MixedOpcode) const;
|
||||
void expandZExtPseudo(MachineInstr &MI, unsigned LowOpcode,
|
||||
unsigned Size) const;
|
||||
void expandLoadStackGuard(MachineInstr *MI) const;
|
||||
|
|
|
@ -474,6 +474,11 @@ let mayLoad = 1, mayStore = 1, Defs = [CC] in {
|
|||
def MVCLU : SideEffectTernaryMemMemRSY<"mvclu", 0xEB8E, GR128, GR128>;
|
||||
}
|
||||
|
||||
// Move right.
|
||||
let Predicates = [FeatureMiscellaneousExtensions3],
|
||||
mayLoad = 1, mayStore = 1, Uses = [R0L] in
|
||||
def MVCRL : SideEffectBinarySSE<"mvcrl", 0xE50A>;
|
||||
|
||||
// String moves.
|
||||
let mayLoad = 1, mayStore = 1, Defs = [CC] in
|
||||
defm MVST : StringRRE<"mvst", 0xB255, z_stpcpy>;
|
||||
|
@ -482,6 +487,29 @@ let mayLoad = 1, mayStore = 1, Defs = [CC] in
|
|||
// Conditional move instructions
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
let Predicates = [FeatureMiscellaneousExtensions3], Uses = [CC] in {
|
||||
// Select.
|
||||
let isCommutable = 1 in {
|
||||
// Expands to SELR or SELFHR or a branch-and-move sequence,
|
||||
// depending on the choice of registers.
|
||||
def SELRMux : CondBinaryRRFaPseudo<GRX32, GRX32, GRX32>;
|
||||
defm SELFHR : CondBinaryRRFaPair<"selfhr", 0xB9C0, GRH32, GRH32, GRH32>;
|
||||
defm SELR : CondBinaryRRFaPair<"selr", 0xB9F0, GR32, GR32, GR32>;
|
||||
defm SELGR : CondBinaryRRFaPair<"selgr", 0xB9E3, GR64, GR64, GR64>;
|
||||
}
|
||||
|
||||
// Define AsmParser extended mnemonics for each general condition-code mask.
|
||||
foreach V = [ "E", "NE", "H", "NH", "L", "NL", "HE", "NHE", "LE", "NLE",
|
||||
"Z", "NZ", "P", "NP", "M", "NM", "LH", "NLH", "O", "NO" ] in {
|
||||
def SELRAsm#V : FixedCondBinaryRRFa<CV<V>, "selr", 0xB9F0,
|
||||
GR32, GR32, GR32>;
|
||||
def SELFHRAsm#V : FixedCondBinaryRRFa<CV<V>, "selfhr", 0xB9C0,
|
||||
GRH32, GRH32, GRH32>;
|
||||
def SELGRAsm#V : FixedCondBinaryRRFa<CV<V>, "selgr", 0xB9E3,
|
||||
GR64, GR64, GR64>;
|
||||
}
|
||||
}
|
||||
|
||||
let Predicates = [FeatureLoadStoreOnCond2], Uses = [CC] in {
|
||||
// Load immediate on condition. Matched via DAG pattern and created
|
||||
// by the PeepholeOptimizer via FoldImmediate.
|
||||
|
@ -1243,6 +1271,43 @@ let Defs = [CC] in {
|
|||
defm : RMWIByte<xor, bdaddr12pair, XI>;
|
||||
defm : RMWIByte<xor, bdaddr20pair, XIY>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Combined logical operations
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
let Predicates = [FeatureMiscellaneousExtensions3],
|
||||
Defs = [CC] in {
|
||||
// AND with complement.
|
||||
let CCValues = 0xC, CompareZeroCCMask = 0x8 in {
|
||||
def NCRK : BinaryRRFa<"ncrk", 0xB9F5, andc, GR32, GR32, GR32>;
|
||||
def NCGRK : BinaryRRFa<"ncgrk", 0xB9E5, andc, GR64, GR64, GR64>;
|
||||
}
|
||||
|
||||
// OR with complement.
|
||||
let CCValues = 0xC, CompareZeroCCMask = 0x8 in {
|
||||
def OCRK : BinaryRRFa<"ocrk", 0xB975, orc, GR32, GR32, GR32>;
|
||||
def OCGRK : BinaryRRFa<"ocgrk", 0xB965, orc, GR64, GR64, GR64>;
|
||||
}
|
||||
|
||||
// NAND.
|
||||
let isCommutable = 1, CCValues = 0xC, CompareZeroCCMask = 0x8 in {
|
||||
def NNRK : BinaryRRFa<"nnrk", 0xB974, nand, GR32, GR32, GR32>;
|
||||
def NNGRK : BinaryRRFa<"nngrk", 0xB964, nand, GR64, GR64, GR64>;
|
||||
}
|
||||
|
||||
// NOR.
|
||||
let isCommutable = 1, CCValues = 0xC, CompareZeroCCMask = 0x8 in {
|
||||
def NORK : BinaryRRFa<"nork", 0xB976, nor, GR32, GR32, GR32>;
|
||||
def NOGRK : BinaryRRFa<"nogrk", 0xB966, nor, GR64, GR64, GR64>;
|
||||
}
|
||||
|
||||
// NXOR.
|
||||
let isCommutable = 1, CCValues = 0xC, CompareZeroCCMask = 0x8 in {
|
||||
def NXRK : BinaryRRFa<"nxrk", 0xB977, nxor, GR32, GR32, GR32>;
|
||||
def NXGRK : BinaryRRFa<"nxgrk", 0xB967, nxor, GR64, GR64, GR64>;
|
||||
}
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Multiplication
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -1837,6 +1902,9 @@ let mayLoad = 1, mayStore = 1, Uses = [R0L, R1D], Defs = [CC] in {
|
|||
let Predicates = [FeatureMessageSecurityAssist8] in
|
||||
def KMA : SideEffectTernaryMemMemMemRRFb<"kma", 0xB929,
|
||||
GR128, GR128, GR128>;
|
||||
|
||||
let Predicates = [FeatureMessageSecurityAssist9] in
|
||||
def KDSA : SideEffectBinaryMemRRE<"kdsa", 0xB93A, GR64, GR128>;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -2017,7 +2085,12 @@ let Defs = [CC] in
|
|||
def : Pat<(ctlz GR64:$src),
|
||||
(EXTRACT_SUBREG (FLOGR GR64:$src), subreg_h64)>;
|
||||
|
||||
// Population count. Counts bits set per byte.
|
||||
// Population count. Counts bits set per byte or doubleword.
|
||||
let Predicates = [FeatureMiscellaneousExtensions3] in {
|
||||
let Defs = [CC] in
|
||||
def POPCNTOpt : BinaryRRFc<"popcnt", 0xB9E1, GR64, GR64>;
|
||||
def : Pat<(ctpop GR64:$src), (POPCNTOpt GR64:$src, 8)>;
|
||||
}
|
||||
let Predicates = [FeaturePopulationCount], Defs = [CC] in
|
||||
def POPCNT : UnaryRRE<"popcnt", 0xB9E1, z_popcnt, GR64, GR64>;
|
||||
|
||||
|
@ -2048,6 +2121,17 @@ let mayLoad = 1, Defs = [CC] in
|
|||
let mayLoad = 1, mayStore = 1, Defs = [CC, R1D], Uses = [R0L, R1D] in
|
||||
def CMPSC : SideEffectBinaryMemMemRRE<"cmpsc", 0xB263, GR128, GR128>;
|
||||
|
||||
// Sort lists.
|
||||
let Predicates = [FeatureEnhancedSort],
|
||||
mayLoad = 1, mayStore = 1, Defs = [CC], Uses = [R0L, R1D] in
|
||||
def SORTL : SideEffectBinaryMemMemRRE<"sortl", 0xB938, GR128, GR128>;
|
||||
|
||||
// Deflate conversion call.
|
||||
let Predicates = [FeatureDeflateConversion],
|
||||
mayLoad = 1, mayStore = 1, Defs = [CC], Uses = [R0L, R1D] in
|
||||
def DFLTCC : SideEffectTernaryMemMemRRFa<"dfltcc", 0xB939,
|
||||
GR128, GR128, GR64>;
|
||||
|
||||
// Execute.
|
||||
let hasSideEffects = 1 in {
|
||||
def EX : SideEffectBinaryRX<"ex", 0x44, GR64>;
|
||||
|
|
|
@ -248,6 +248,81 @@ let Predicates = [FeatureVectorPackedDecimal] in {
|
|||
def VSTRLR : StoreLengthVRSd<"vstrlr", 0xE63F, int_s390_vstrl, 0>;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Byte swaps
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
let Predicates = [FeatureVectorEnhancements2] in {
|
||||
// Load byte-reversed elements.
|
||||
def VLBR : UnaryVRXGeneric<"vlbr", 0xE606>;
|
||||
def VLBRH : UnaryVRX<"vlbrh", 0xE606, z_loadbswap, v128h, 16, 1>;
|
||||
def VLBRF : UnaryVRX<"vlbrf", 0xE606, z_loadbswap, v128f, 16, 2>;
|
||||
def VLBRG : UnaryVRX<"vlbrg", 0xE606, z_loadbswap, v128g, 16, 3>;
|
||||
def VLBRQ : UnaryVRX<"vlbrq", 0xE606, null_frag, v128q, 16, 4>;
|
||||
|
||||
// Load elements reversed.
|
||||
def VLER : UnaryVRXGeneric<"vler", 0xE607>;
|
||||
def VLERH : UnaryVRX<"vlerh", 0xE607, z_loadeswap, v128h, 16, 1>;
|
||||
def VLERF : UnaryVRX<"vlerf", 0xE607, z_loadeswap, v128f, 16, 2>;
|
||||
def VLERG : UnaryVRX<"vlerg", 0xE607, z_loadeswap, v128g, 16, 3>;
|
||||
def : Pat<(v4f32 (z_loadeswap bdxaddr12only:$addr)),
|
||||
(VLERF bdxaddr12only:$addr)>;
|
||||
def : Pat<(v2f64 (z_loadeswap bdxaddr12only:$addr)),
|
||||
(VLERG bdxaddr12only:$addr)>;
|
||||
def : Pat<(v16i8 (z_loadeswap bdxaddr12only:$addr)),
|
||||
(VLBRQ bdxaddr12only:$addr)>;
|
||||
|
||||
// Load byte-reversed element.
|
||||
def VLEBRH : TernaryVRX<"vlebrh", 0xE601, z_vlebri16, v128h, v128h, 2, imm32zx3>;
|
||||
def VLEBRF : TernaryVRX<"vlebrf", 0xE603, z_vlebri32, v128f, v128f, 4, imm32zx2>;
|
||||
def VLEBRG : TernaryVRX<"vlebrg", 0xE602, z_vlebri64, v128g, v128g, 8, imm32zx1>;
|
||||
|
||||
// Load byte-reversed element and zero.
|
||||
def VLLEBRZ : UnaryVRXGeneric<"vllebrz", 0xE604>;
|
||||
def VLLEBRZH : UnaryVRX<"vllebrzh", 0xE604, z_vllebrzi16, v128h, 2, 1>;
|
||||
def VLLEBRZF : UnaryVRX<"vllebrzf", 0xE604, z_vllebrzi32, v128f, 4, 2>;
|
||||
def VLLEBRZG : UnaryVRX<"vllebrzg", 0xE604, z_vllebrzi64, v128g, 8, 3>;
|
||||
def VLLEBRZE : UnaryVRX<"vllebrze", 0xE604, z_vllebrzli32, v128f, 4, 6>;
|
||||
def : InstAlias<"lerv\t$V1, $XBD2",
|
||||
(VLLEBRZE VR128:$V1, bdxaddr12only:$XBD2), 0>;
|
||||
def : InstAlias<"ldrv\t$V1, $XBD2",
|
||||
(VLLEBRZG VR128:$V1, bdxaddr12only:$XBD2), 0>;
|
||||
|
||||
// Load byte-reversed element and replicate.
|
||||
def VLBRREP : UnaryVRXGeneric<"vlbrrep", 0xE605>;
|
||||
def VLBRREPH : UnaryVRX<"vlbrreph", 0xE605, z_replicate_loadbswapi16, v128h, 2, 1>;
|
||||
def VLBRREPF : UnaryVRX<"vlbrrepf", 0xE605, z_replicate_loadbswapi32, v128f, 4, 2>;
|
||||
def VLBRREPG : UnaryVRX<"vlbrrepg", 0xE605, z_replicate_loadbswapi64, v128g, 8, 3>;
|
||||
|
||||
// Store byte-reversed elements.
|
||||
def VSTBR : StoreVRXGeneric<"vstbr", 0xE60E>;
|
||||
def VSTBRH : StoreVRX<"vstbrh", 0xE60E, z_storebswap, v128h, 16, 1>;
|
||||
def VSTBRF : StoreVRX<"vstbrf", 0xE60E, z_storebswap, v128f, 16, 2>;
|
||||
def VSTBRG : StoreVRX<"vstbrg", 0xE60E, z_storebswap, v128g, 16, 3>;
|
||||
def VSTBRQ : StoreVRX<"vstbrq", 0xE60E, null_frag, v128q, 16, 4>;
|
||||
|
||||
// Store elements reversed.
|
||||
def VSTER : StoreVRXGeneric<"vster", 0xE60F>;
|
||||
def VSTERH : StoreVRX<"vsterh", 0xE60F, z_storeeswap, v128h, 16, 1>;
|
||||
def VSTERF : StoreVRX<"vsterf", 0xE60F, z_storeeswap, v128f, 16, 2>;
|
||||
def VSTERG : StoreVRX<"vsterg", 0xE60F, z_storeeswap, v128g, 16, 3>;
|
||||
def : Pat<(z_storeeswap (v4f32 VR128:$val), bdxaddr12only:$addr),
|
||||
(VSTERF VR128:$val, bdxaddr12only:$addr)>;
|
||||
def : Pat<(z_storeeswap (v2f64 VR128:$val), bdxaddr12only:$addr),
|
||||
(VSTERG VR128:$val, bdxaddr12only:$addr)>;
|
||||
def : Pat<(z_storeeswap (v16i8 VR128:$val), bdxaddr12only:$addr),
|
||||
(VSTBRQ VR128:$val, bdxaddr12only:$addr)>;
|
||||
|
||||
// Store byte-reversed element.
|
||||
def VSTEBRH : StoreBinaryVRX<"vstebrh", 0xE609, z_vstebri16, v128h, 2, imm32zx3>;
|
||||
def VSTEBRF : StoreBinaryVRX<"vstebrf", 0xE60B, z_vstebri32, v128f, 4, imm32zx2>;
|
||||
def VSTEBRG : StoreBinaryVRX<"vstebrg", 0xE60A, z_vstebri64, v128g, 8, imm32zx1>;
|
||||
def : InstAlias<"sterv\t$V1, $XBD2",
|
||||
(VSTEBRF VR128:$V1, bdxaddr12only:$XBD2, 0), 0>;
|
||||
def : InstAlias<"stdrv\t$V1, $XBD2",
|
||||
(VSTEBRG VR128:$V1, bdxaddr12only:$XBD2, 0), 0>;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Selects and permutes
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -706,6 +781,10 @@ let Predicates = [FeatureVector] in {
|
|||
def : Pat<(int_s390_vsldb VR128:$x, VR128:$y, imm32zx8:$z),
|
||||
(VSLDB VR128:$x, VR128:$y, imm32zx8:$z)>;
|
||||
|
||||
// Shift left double by bit.
|
||||
let Predicates = [FeatureVectorEnhancements2] in
|
||||
def VSLD : TernaryVRId<"vsld", 0xE786, int_s390_vsld, v128b, v128b, 0>;
|
||||
|
||||
// Shift right arithmetic.
|
||||
def VSRA : BinaryVRRc<"vsra", 0xE77E, int_s390_vsra, v128b, v128b>;
|
||||
|
||||
|
@ -718,6 +797,10 @@ let Predicates = [FeatureVector] in {
|
|||
// Shift right logical by byte.
|
||||
def VSRLB : BinaryVRRc<"vsrlb", 0xE77D, int_s390_vsrlb, v128b, v128b>;
|
||||
|
||||
// Shift right double by bit.
|
||||
let Predicates = [FeatureVectorEnhancements2] in
|
||||
def VSRD : TernaryVRId<"vsrd", 0xE787, int_s390_vsrd, v128b, v128b, 0>;
|
||||
|
||||
// Subtract.
|
||||
def VS : BinaryVRRcGeneric<"vs", 0xE7F7>;
|
||||
def VSB : BinaryVRRc<"vsb", 0xE7F7, sub, v128b, v128b, 0>;
|
||||
|
@ -945,23 +1028,41 @@ let Predicates = [FeatureVector] in {
|
|||
}
|
||||
}
|
||||
|
||||
// Convert from fixed 64-bit.
|
||||
// Convert from fixed.
|
||||
let Uses = [FPC], mayRaiseFPException = 1 in {
|
||||
def VCDG : TernaryVRRaFloatGeneric<"vcdg", 0xE7C3>;
|
||||
def VCDGB : TernaryVRRa<"vcdgb", 0xE7C3, null_frag, v128db, v128g, 3, 0>;
|
||||
def WCDGB : TernaryVRRa<"wcdgb", 0xE7C3, null_frag, v64db, v64g, 3, 8>;
|
||||
}
|
||||
def : FPConversion<VCDGB, sint_to_fp, v128db, v128g, 0, 0>;
|
||||
let Predicates = [FeatureVectorEnhancements2] in {
|
||||
let Uses = [FPC], mayRaiseFPException = 1 in {
|
||||
let isAsmParserOnly = 1 in
|
||||
def VCFPS : TernaryVRRaFloatGeneric<"vcfps", 0xE7C3>;
|
||||
def VCEFB : TernaryVRRa<"vcefb", 0xE7C3, null_frag, v128sb, v128g, 2, 0>;
|
||||
def WCEFB : TernaryVRRa<"wcefb", 0xE7C3, null_frag, v32sb, v32f, 2, 8>;
|
||||
}
|
||||
def : FPConversion<VCEFB, sint_to_fp, v128sb, v128f, 0, 0>;
|
||||
}
|
||||
|
||||
// Convert from logical 64-bit.
|
||||
// Convert from logical.
|
||||
let Uses = [FPC], mayRaiseFPException = 1 in {
|
||||
def VCDLG : TernaryVRRaFloatGeneric<"vcdlg", 0xE7C1>;
|
||||
def VCDLGB : TernaryVRRa<"vcdlgb", 0xE7C1, null_frag, v128db, v128g, 3, 0>;
|
||||
def WCDLGB : TernaryVRRa<"wcdlgb", 0xE7C1, null_frag, v64db, v64g, 3, 8>;
|
||||
}
|
||||
def : FPConversion<VCDLGB, uint_to_fp, v128db, v128g, 0, 0>;
|
||||
let Predicates = [FeatureVectorEnhancements2] in {
|
||||
let Uses = [FPC], mayRaiseFPException = 1 in {
|
||||
let isAsmParserOnly = 1 in
|
||||
def VCFPL : TernaryVRRaFloatGeneric<"vcfpl", 0xE7C1>;
|
||||
def VCELFB : TernaryVRRa<"vcelfb", 0xE7C1, null_frag, v128sb, v128g, 2, 0>;
|
||||
def WCELFB : TernaryVRRa<"wcelfb", 0xE7C1, null_frag, v32sb, v32f, 2, 8>;
|
||||
}
|
||||
def : FPConversion<VCELFB, uint_to_fp, v128sb, v128f, 0, 0>;
|
||||
}
|
||||
|
||||
// Convert to fixed 64-bit.
|
||||
// Convert to fixed.
|
||||
let Uses = [FPC], mayRaiseFPException = 1 in {
|
||||
def VCGD : TernaryVRRaFloatGeneric<"vcgd", 0xE7C2>;
|
||||
def VCGDB : TernaryVRRa<"vcgdb", 0xE7C2, null_frag, v128g, v128db, 3, 0>;
|
||||
|
@ -969,8 +1070,18 @@ let Predicates = [FeatureVector] in {
|
|||
}
|
||||
// Rounding mode should agree with SystemZInstrFP.td.
|
||||
def : FPConversion<VCGDB, fp_to_sint, v128g, v128db, 0, 5>;
|
||||
let Predicates = [FeatureVectorEnhancements2] in {
|
||||
let Uses = [FPC], mayRaiseFPException = 1 in {
|
||||
let isAsmParserOnly = 1 in
|
||||
def VCSFP : TernaryVRRaFloatGeneric<"vcsfp", 0xE7C2>;
|
||||
def VCFEB : TernaryVRRa<"vcfeb", 0xE7C2, null_frag, v128sb, v128g, 2, 0>;
|
||||
def WCFEB : TernaryVRRa<"wcfeb", 0xE7C2, null_frag, v32sb, v32f, 2, 8>;
|
||||
}
|
||||
// Rounding mode should agree with SystemZInstrFP.td.
|
||||
def : FPConversion<VCFEB, fp_to_sint, v128f, v128sb, 0, 5>;
|
||||
}
|
||||
|
||||
// Convert to logical 64-bit.
|
||||
// Convert to logical.
|
||||
let Uses = [FPC], mayRaiseFPException = 1 in {
|
||||
def VCLGD : TernaryVRRaFloatGeneric<"vclgd", 0xE7C0>;
|
||||
def VCLGDB : TernaryVRRa<"vclgdb", 0xE7C0, null_frag, v128g, v128db, 3, 0>;
|
||||
|
@ -978,6 +1089,16 @@ let Predicates = [FeatureVector] in {
|
|||
}
|
||||
// Rounding mode should agree with SystemZInstrFP.td.
|
||||
def : FPConversion<VCLGDB, fp_to_uint, v128g, v128db, 0, 5>;
|
||||
let Predicates = [FeatureVectorEnhancements2] in {
|
||||
let Uses = [FPC], mayRaiseFPException = 1 in {
|
||||
let isAsmParserOnly = 1 in
|
||||
def VCLFP : TernaryVRRaFloatGeneric<"vclfp", 0xE7C0>;
|
||||
def VCLFEB : TernaryVRRa<"vclfeb", 0xE7C0, null_frag, v128sb, v128g, 2, 0>;
|
||||
def WCLFEB : TernaryVRRa<"wclfeb", 0xE7C0, null_frag, v32sb, v32f, 2, 8>;
|
||||
}
|
||||
// Rounding mode should agree with SystemZInstrFP.td.
|
||||
def : FPConversion<VCLFEB, fp_to_uint, v128f, v128sb, 0, 5>;
|
||||
}
|
||||
|
||||
// Divide.
|
||||
let Uses = [FPC], mayRaiseFPException = 1 in {
|
||||
|
@ -1568,6 +1689,24 @@ let Predicates = [FeatureVector] in {
|
|||
z_vstrcz_cc, v128f, v128f, 2, 2>;
|
||||
}
|
||||
|
||||
let Predicates = [FeatureVectorEnhancements2] in {
|
||||
defm VSTRS : TernaryExtraVRRdGeneric<"vstrs", 0xE78B>;
|
||||
defm VSTRSB : TernaryExtraVRRd<"vstrsb", 0xE78B,
|
||||
z_vstrs_cc, v128b, v128b, 0>;
|
||||
defm VSTRSH : TernaryExtraVRRd<"vstrsh", 0xE78B,
|
||||
z_vstrs_cc, v128b, v128h, 1>;
|
||||
defm VSTRSF : TernaryExtraVRRd<"vstrsf", 0xE78B,
|
||||
z_vstrs_cc, v128b, v128f, 2>;
|
||||
let Defs = [CC] in {
|
||||
def VSTRSZB : TernaryVRRd<"vstrszb", 0xE78B,
|
||||
z_vstrsz_cc, v128b, v128b, 0, 2>;
|
||||
def VSTRSZH : TernaryVRRd<"vstrszh", 0xE78B,
|
||||
z_vstrsz_cc, v128b, v128h, 1, 2>;
|
||||
def VSTRSZF : TernaryVRRd<"vstrszf", 0xE78B,
|
||||
z_vstrsz_cc, v128b, v128f, 2, 2>;
|
||||
}
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Packed-decimal instructions
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -1579,6 +1718,10 @@ let Predicates = [FeatureVectorPackedDecimal] in {
|
|||
def VUPKZ : StoreLengthVSI<"vupkz", 0xE63C, null_frag, 0>;
|
||||
|
||||
let Defs = [CC] in {
|
||||
let Predicates = [FeatureVectorPackedDecimalEnhancement] in {
|
||||
def VCVBOpt : TernaryVRRi<"vcvb", 0xE650, GR32>;
|
||||
def VCVBGOpt : TernaryVRRi<"vcvbg", 0xE652, GR64>;
|
||||
}
|
||||
def VCVB : BinaryVRRi<"vcvb", 0xE650, GR32>;
|
||||
def VCVBG : BinaryVRRi<"vcvbg", 0xE652, GR64>;
|
||||
def VCVD : TernaryVRIi<"vcvd", 0xE658, GR32>;
|
||||
|
|
|
@ -191,6 +191,12 @@ def SDT_ZVecTernary : SDTypeProfile<1, 3,
|
|||
SDTCisSameAs<0, 1>,
|
||||
SDTCisSameAs<0, 2>,
|
||||
SDTCisSameAs<0, 3>]>;
|
||||
def SDT_ZVecTernaryConvCC : SDTypeProfile<2, 3,
|
||||
[SDTCisVec<0>,
|
||||
SDTCisVT<1, i32>,
|
||||
SDTCisVec<2>,
|
||||
SDTCisSameAs<2, 3>,
|
||||
SDTCisSameAs<0, 4>]>;
|
||||
def SDT_ZVecTernaryInt : SDTypeProfile<1, 3,
|
||||
[SDTCisVec<0>,
|
||||
SDTCisSameAs<0, 1>,
|
||||
|
@ -278,6 +284,10 @@ def z_loadbswap : SDNode<"SystemZISD::LRV", SDTLoad,
|
|||
[SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
|
||||
def z_storebswap : SDNode<"SystemZISD::STRV", SDTStore,
|
||||
[SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
|
||||
def z_loadeswap : SDNode<"SystemZISD::VLER", SDTLoad,
|
||||
[SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
|
||||
def z_storeeswap : SDNode<"SystemZISD::VSTER", SDTStore,
|
||||
[SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
|
||||
|
||||
def z_tdc : SDNode<"SystemZISD::TDC", SDT_ZTest>;
|
||||
|
||||
|
@ -337,6 +347,10 @@ def z_vstrc_cc : SDNode<"SystemZISD::VSTRC_CC",
|
|||
SDT_ZVecQuaternaryIntCC>;
|
||||
def z_vstrcz_cc : SDNode<"SystemZISD::VSTRCZ_CC",
|
||||
SDT_ZVecQuaternaryIntCC>;
|
||||
def z_vstrs_cc : SDNode<"SystemZISD::VSTRS_CC",
|
||||
SDT_ZVecTernaryConvCC>;
|
||||
def z_vstrsz_cc : SDNode<"SystemZISD::VSTRSZ_CC",
|
||||
SDT_ZVecTernaryConvCC>;
|
||||
def z_vftci : SDNode<"SystemZISD::VFTCI", SDT_ZVecBinaryConvIntCC>;
|
||||
|
||||
class AtomicWOp<string name, SDTypeProfile profile = SDT_ZAtomicLoadBinaryW>
|
||||
|
@ -661,6 +675,18 @@ def z_usub : PatFrags<(ops node:$src1, node:$src2),
|
|||
[(z_usubo node:$src1, node:$src2),
|
||||
(sub node:$src1, node:$src2)]>;
|
||||
|
||||
// Combined logical operations.
|
||||
def andc : PatFrag<(ops node:$src1, node:$src2),
|
||||
(and node:$src1, (not node:$src2))>;
|
||||
def orc : PatFrag<(ops node:$src1, node:$src2),
|
||||
(or node:$src1, (not node:$src2))>;
|
||||
def nand : PatFrag<(ops node:$src1, node:$src2),
|
||||
(not (and node:$src1, node:$src2))>;
|
||||
def nor : PatFrag<(ops node:$src1, node:$src2),
|
||||
(not (or node:$src1, node:$src2))>;
|
||||
def nxor : PatFrag<(ops node:$src1, node:$src2),
|
||||
(not (xor node:$src1, node:$src2))>;
|
||||
|
||||
// Fused multiply-subtract, using the natural operand order.
|
||||
def any_fms : PatFrag<(ops node:$src1, node:$src2, node:$src3),
|
||||
(any_fma node:$src1, node:$src2, (fneg node:$src3))>;
|
||||
|
@ -722,6 +748,10 @@ def z_replicate_loadi32 : z_replicate_load<i32, load>;
|
|||
def z_replicate_loadi64 : z_replicate_load<i64, load>;
|
||||
def z_replicate_loadf32 : z_replicate_load<f32, load>;
|
||||
def z_replicate_loadf64 : z_replicate_load<f64, load>;
|
||||
// Byte-swapped replicated vector element loads.
|
||||
def z_replicate_loadbswapi16 : z_replicate_load<i32, z_loadbswap16>;
|
||||
def z_replicate_loadbswapi32 : z_replicate_load<i32, z_loadbswap32>;
|
||||
def z_replicate_loadbswapi64 : z_replicate_load<i64, z_loadbswap64>;
|
||||
|
||||
// Load a scalar and insert it into a single element of a vector.
|
||||
class z_vle<ValueType scalartype, SDPatternOperator load>
|
||||
|
@ -734,6 +764,10 @@ def z_vlei32 : z_vle<i32, load>;
|
|||
def z_vlei64 : z_vle<i64, load>;
|
||||
def z_vlef32 : z_vle<f32, load>;
|
||||
def z_vlef64 : z_vle<f64, load>;
|
||||
// Byte-swapped vector element loads.
|
||||
def z_vlebri16 : z_vle<i32, z_loadbswap16>;
|
||||
def z_vlebri32 : z_vle<i32, z_loadbswap32>;
|
||||
def z_vlebri64 : z_vle<i64, z_loadbswap64>;
|
||||
|
||||
// Load a scalar and insert it into the low element of the high i64 of a
|
||||
// zeroed vector.
|
||||
|
@ -778,6 +812,18 @@ def z_vllezlf32 : PatFrag<(ops node:$addr),
|
|||
(v2i64
|
||||
(bitconvert (v4f32 immAllZerosV))))>;
|
||||
|
||||
// Byte-swapped variants.
|
||||
def z_vllebrzi16 : z_vllez<i32, z_loadbswap16, 3>;
|
||||
def z_vllebrzi32 : z_vllez<i32, z_loadbswap32, 1>;
|
||||
def z_vllebrzli32 : z_vllez<i32, z_loadbswap32, 0>;
|
||||
def z_vllebrzi64 : PatFrags<(ops node:$addr),
|
||||
[(z_vector_insert immAllZerosV,
|
||||
(i64 (z_loadbswap64 node:$addr)),
|
||||
(i32 0)),
|
||||
(z_join_dwords (i64 (z_loadbswap64 node:$addr)),
|
||||
(i64 0))]>;
|
||||
|
||||
|
||||
// Store one element of a vector.
|
||||
class z_vste<ValueType scalartype, SDPatternOperator store>
|
||||
: PatFrag<(ops node:$vec, node:$addr, node:$index),
|
||||
|
@ -789,6 +835,10 @@ def z_vstei32 : z_vste<i32, store>;
|
|||
def z_vstei64 : z_vste<i64, store>;
|
||||
def z_vstef32 : z_vste<f32, store>;
|
||||
def z_vstef64 : z_vste<f64, store>;
|
||||
// Byte-swapped vector element stores.
|
||||
def z_vstebri16 : z_vste<i32, z_storebswap16>;
|
||||
def z_vstebri32 : z_vste<i32, z_storebswap32>;
|
||||
def z_vstebri64 : z_vste<i64, z_storebswap64>;
|
||||
|
||||
// Arithmetic negation on vectors.
|
||||
def z_vneg : PatFrag<(ops node:$x), (sub immAllZerosV, node:$x)>;
|
||||
|
|
|
@ -35,3 +35,5 @@ def : ProcessorModel<"z13", Z13Model, Arch11SupportedFeatures.List>;
|
|||
def : ProcessorModel<"arch12", Z14Model, Arch12SupportedFeatures.List>;
|
||||
def : ProcessorModel<"z14", Z14Model, Arch12SupportedFeatures.List>;
|
||||
|
||||
def : ProcessorModel<"arch13", Arch13Model, Arch13SupportedFeatures.List>;
|
||||
|
||||
|
|
|
@ -96,17 +96,21 @@ SystemZRegisterInfo::getRegAllocationHints(unsigned VirtReg,
|
|||
if (!DoneRegs.insert(Reg).second)
|
||||
continue;
|
||||
|
||||
for (auto &Use : MRI->use_instructions(Reg)) {
|
||||
for (auto &Use : MRI->reg_instructions(Reg)) {
|
||||
// For LOCRMux, see if the other operand is already a high or low
|
||||
// register, and in that case give the correpsonding hints for
|
||||
// register, and in that case give the corresponding hints for
|
||||
// VirtReg. LOCR instructions need both operands in either high or
|
||||
// low parts.
|
||||
if (Use.getOpcode() == SystemZ::LOCRMux) {
|
||||
// low parts. Same handling for SELRMux.
|
||||
if (Use.getOpcode() == SystemZ::LOCRMux ||
|
||||
Use.getOpcode() == SystemZ::SELRMux) {
|
||||
MachineOperand &TrueMO = Use.getOperand(1);
|
||||
MachineOperand &FalseMO = Use.getOperand(2);
|
||||
const TargetRegisterClass *RC =
|
||||
TRI->getCommonSubClass(getRC32(FalseMO, VRM, MRI),
|
||||
getRC32(TrueMO, VRM, MRI));
|
||||
if (Use.getOpcode() == SystemZ::SELRMux)
|
||||
RC = TRI->getCommonSubClass(RC,
|
||||
getRC32(Use.getOperand(0), VRM, MRI));
|
||||
if (RC && RC != &SystemZ::GRX32BitRegClass) {
|
||||
addHints(Order, Hints, RC, MRI);
|
||||
// Return true to make these hints the only regs available to
|
||||
|
|
|
@ -59,6 +59,7 @@ def VBU : SchedWrite; // Virtual branching unit
|
|||
|
||||
def MCD : SchedWrite; // Millicode
|
||||
|
||||
include "SystemZScheduleArch13.td"
|
||||
include "SystemZScheduleZ14.td"
|
||||
include "SystemZScheduleZ13.td"
|
||||
include "SystemZScheduleZEC12.td"
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -46,6 +46,7 @@ private:
|
|||
bool shortenOn001(MachineInstr &MI, unsigned Opcode);
|
||||
bool shortenOn001AddCC(MachineInstr &MI, unsigned Opcode);
|
||||
bool shortenFPConv(MachineInstr &MI, unsigned Opcode);
|
||||
bool shortenSelect(MachineInstr &MI, unsigned Opcode);
|
||||
|
||||
const SystemZInstrInfo *TII;
|
||||
const TargetRegisterInfo *TRI;
|
||||
|
@ -175,6 +176,23 @@ bool SystemZShortenInst::shortenFPConv(MachineInstr &MI, unsigned Opcode) {
|
|||
return false;
|
||||
}
|
||||
|
||||
// MI is a three-operand select instruction. If one of the sources match
|
||||
// the destination, convert to the equivalent load-on-condition.
|
||||
bool SystemZShortenInst::shortenSelect(MachineInstr &MI, unsigned Opcode) {
|
||||
if (MI.getOperand(0).getReg() == MI.getOperand(1).getReg()) {
|
||||
MI.setDesc(TII->get(Opcode));
|
||||
MI.tieOperands(0, 1);
|
||||
return true;
|
||||
}
|
||||
if (MI.getOperand(0).getReg() == MI.getOperand(2).getReg()) {
|
||||
TII->commuteInstruction(MI, false, 1, 2);
|
||||
MI.setDesc(TII->get(Opcode));
|
||||
MI.tieOperands(0, 1);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// Process all instructions in MBB. Return true if something changed.
|
||||
bool SystemZShortenInst::processBlock(MachineBasicBlock &MBB) {
|
||||
bool Changed = false;
|
||||
|
@ -195,6 +213,18 @@ bool SystemZShortenInst::processBlock(MachineBasicBlock &MBB) {
|
|||
Changed |= shortenIIF(MI, SystemZ::LLIHL, SystemZ::LLIHH);
|
||||
break;
|
||||
|
||||
case SystemZ::SELR:
|
||||
Changed |= shortenSelect(MI, SystemZ::LOCR);
|
||||
break;
|
||||
|
||||
case SystemZ::SELFHR:
|
||||
Changed |= shortenSelect(MI, SystemZ::LOCFHR);
|
||||
break;
|
||||
|
||||
case SystemZ::SELGR:
|
||||
Changed |= shortenSelect(MI, SystemZ::LOCGR);
|
||||
break;
|
||||
|
||||
case SystemZ::WFADB:
|
||||
Changed |= shortenOn001AddCC(MI, SystemZ::ADBR);
|
||||
break;
|
||||
|
|
|
@ -55,6 +55,9 @@ SystemZSubtarget::SystemZSubtarget(const Triple &TT, const std::string &CPU,
|
|||
HasMessageSecurityAssist7(false), HasMessageSecurityAssist8(false),
|
||||
HasVectorEnhancements1(false), HasVectorPackedDecimal(false),
|
||||
HasInsertReferenceBitsMultiple(false),
|
||||
HasMiscellaneousExtensions3(false), HasMessageSecurityAssist9(false),
|
||||
HasVectorEnhancements2(false), HasVectorPackedDecimalEnhancement(false),
|
||||
HasEnhancedSort(false), HasDeflateConversion(false),
|
||||
TargetTriple(TT), InstrInfo(initializeSubtargetDependencies(CPU, FS)),
|
||||
TLInfo(TM, *this), TSInfo(), FrameLowering() {}
|
||||
|
||||
|
|
|
@ -62,6 +62,12 @@ protected:
|
|||
bool HasVectorEnhancements1;
|
||||
bool HasVectorPackedDecimal;
|
||||
bool HasInsertReferenceBitsMultiple;
|
||||
bool HasMiscellaneousExtensions3;
|
||||
bool HasMessageSecurityAssist9;
|
||||
bool HasVectorEnhancements2;
|
||||
bool HasVectorPackedDecimalEnhancement;
|
||||
bool HasEnhancedSort;
|
||||
bool HasDeflateConversion;
|
||||
|
||||
private:
|
||||
Triple TargetTriple;
|
||||
|
@ -209,6 +215,30 @@ public:
|
|||
return HasInsertReferenceBitsMultiple;
|
||||
}
|
||||
|
||||
// Return true if the target has the miscellaneous-extensions facility 3.
|
||||
bool hasMiscellaneousExtensions3() const {
|
||||
return HasMiscellaneousExtensions3;
|
||||
}
|
||||
|
||||
// Return true if the target has the message-security-assist
|
||||
// extension facility 9.
|
||||
bool hasMessageSecurityAssist9() const { return HasMessageSecurityAssist9; }
|
||||
|
||||
// Return true if the target has the vector-enhancements facility 2.
|
||||
bool hasVectorEnhancements2() const { return HasVectorEnhancements2; }
|
||||
|
||||
// Return true if the target has the vector-packed-decimal
|
||||
// enhancement facility.
|
||||
bool hasVectorPackedDecimalEnhancement() const {
|
||||
return HasVectorPackedDecimalEnhancement;
|
||||
}
|
||||
|
||||
// Return true if the target has the enhanced-sort facility.
|
||||
bool hasEnhancedSort() const { return HasEnhancedSort; }
|
||||
|
||||
// Return true if the target has the deflate-conversion facility.
|
||||
bool hasDeflateConversion() const { return HasDeflateConversion; }
|
||||
|
||||
// Return true if GV can be accessed using LARL for reloc model RM
|
||||
// and code model CM.
|
||||
bool isPC32DBLSymbol(const GlobalValue *GV, CodeModel::Model CM) const;
|
||||
|
|
|
@ -466,6 +466,27 @@ int SystemZTTIImpl::getArithmeticInstrCost(
|
|||
if (Opcode == Instruction::FRem)
|
||||
return LIBCALL_COST;
|
||||
|
||||
// Give discount for some combined logical operations if supported.
|
||||
if (Args.size() == 2 && ST->hasMiscellaneousExtensions3()) {
|
||||
if (Opcode == Instruction::Xor) {
|
||||
for (const Value *A : Args) {
|
||||
if (const Instruction *I = dyn_cast<Instruction>(A))
|
||||
if (I->hasOneUse() &&
|
||||
(I->getOpcode() == Instruction::And ||
|
||||
I->getOpcode() == Instruction::Or ||
|
||||
I->getOpcode() == Instruction::Xor))
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
else if (Opcode == Instruction::Or || Opcode == Instruction::And) {
|
||||
for (const Value *A : Args) {
|
||||
if (const Instruction *I = dyn_cast<Instruction>(A))
|
||||
if (I->hasOneUse() && I->getOpcode() == Instruction::Xor)
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Or requires one instruction, although it has custom handling for i64.
|
||||
if (Opcode == Instruction::Or)
|
||||
return 1;
|
||||
|
@ -686,9 +707,9 @@ int SystemZTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
|
|||
// TODO: Fix base implementation which could simplify things a bit here
|
||||
// (seems to miss on differentiating on scalar/vector types).
|
||||
|
||||
// Only 64 bit vector conversions are natively supported.
|
||||
if (DstScalarBits == 64) {
|
||||
if (SrcScalarBits == 64)
|
||||
// Only 64 bit vector conversions are natively supported before arch13.
|
||||
if (DstScalarBits == 64 || ST->hasVectorEnhancements2()) {
|
||||
if (SrcScalarBits == DstScalarBits)
|
||||
return NumDstVectors;
|
||||
|
||||
if (SrcScalarBits == 1)
|
||||
|
@ -856,7 +877,7 @@ int SystemZTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
|
|||
case Instruction::Select:
|
||||
if (ValTy->isFloatingPointTy())
|
||||
return 4; // No load on condition for FP - costs a conditional jump.
|
||||
return 1; // Load On Condition.
|
||||
return 1; // Load On Condition / Select Register.
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1009,7 +1030,8 @@ int SystemZTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
|
|||
(Src->isVectorTy() ? getNumVectorRegs(Src) : getNumberOfParts(Src));
|
||||
|
||||
// Store/Load reversed saves one instruction.
|
||||
if (!Src->isVectorTy() && NumOps == 1 && I != nullptr) {
|
||||
if (((!Src->isVectorTy() && NumOps == 1) || ST->hasVectorEnhancements2()) &&
|
||||
I != nullptr) {
|
||||
if (Opcode == Instruction::Load && I->hasOneUse()) {
|
||||
const Instruction *LdUser = cast<Instruction>(*I->user_begin());
|
||||
// In case of load -> bswap -> store, return normal cost for the load.
|
||||
|
|
|
@ -1,4 +1,7 @@
|
|||
; RUN: opt < %s -cost-model -analyze -mtriple=systemz-unknown -mcpu=z13 | FileCheck %s
|
||||
; RUN: opt < %s -cost-model -analyze -mtriple=systemz-unknown -mcpu=z13 \
|
||||
; RUN: | FileCheck %s -check-prefixes=CHECK,Z13
|
||||
; RUN: opt < %s -cost-model -analyze -mtriple=systemz-unknown -mcpu=arch13 \
|
||||
; RUN: | FileCheck %s -check-prefixes=CHECK,AR13
|
||||
;
|
||||
; Note: The scalarized vector instructions costs are not including any
|
||||
; extracts, due to the undef operands.
|
||||
|
@ -114,7 +117,8 @@ define void @fptosi() {
|
|||
; CHECK: Cost Model: Found an estimated cost of 6 for instruction: %v18 = fptosi <2 x double> undef to <2 x i16>
|
||||
; CHECK: Cost Model: Found an estimated cost of 6 for instruction: %v19 = fptosi <2 x double> undef to <2 x i8>
|
||||
; CHECK: Cost Model: Found an estimated cost of 5 for instruction: %v20 = fptosi <2 x float> undef to <2 x i64>
|
||||
; CHECK: Cost Model: Found an estimated cost of 12 for instruction: %v21 = fptosi <2 x float> undef to <2 x i32>
|
||||
; Z13: Cost Model: Found an estimated cost of 12 for instruction: %v21 = fptosi <2 x float> undef to <2 x i32>
|
||||
; AR13: Cost Model: Found an estimated cost of 1 for instruction: %v21 = fptosi <2 x float> undef to <2 x i32>
|
||||
; CHECK: Cost Model: Found an estimated cost of 6 for instruction: %v22 = fptosi <2 x float> undef to <2 x i16>
|
||||
; CHECK: Cost Model: Found an estimated cost of 6 for instruction: %v23 = fptosi <2 x float> undef to <2 x i8>
|
||||
; CHECK: Cost Model: Found an estimated cost of 6 for instruction: %v24 = fptosi <4 x fp128> undef to <4 x i64>
|
||||
|
@ -126,7 +130,8 @@ define void @fptosi() {
|
|||
; CHECK: Cost Model: Found an estimated cost of 12 for instruction: %v30 = fptosi <4 x double> undef to <4 x i16>
|
||||
; CHECK: Cost Model: Found an estimated cost of 12 for instruction: %v31 = fptosi <4 x double> undef to <4 x i8>
|
||||
; CHECK: Cost Model: Found an estimated cost of 10 for instruction: %v32 = fptosi <4 x float> undef to <4 x i64>
|
||||
; CHECK: Cost Model: Found an estimated cost of 12 for instruction: %v33 = fptosi <4 x float> undef to <4 x i32>
|
||||
; Z13: Cost Model: Found an estimated cost of 12 for instruction: %v33 = fptosi <4 x float> undef to <4 x i32>
|
||||
; AR13: Cost Model: Found an estimated cost of 1 for instruction: %v33 = fptosi <4 x float> undef to <4 x i32>
|
||||
; CHECK: Cost Model: Found an estimated cost of 12 for instruction: %v34 = fptosi <4 x float> undef to <4 x i16>
|
||||
; CHECK: Cost Model: Found an estimated cost of 12 for instruction: %v35 = fptosi <4 x float> undef to <4 x i8>
|
||||
; CHECK: Cost Model: Found an estimated cost of 12 for instruction: %v36 = fptosi <8 x fp128> undef to <8 x i64>
|
||||
|
@ -138,7 +143,8 @@ define void @fptosi() {
|
|||
; CHECK: Cost Model: Found an estimated cost of 24 for instruction: %v42 = fptosi <8 x double> undef to <8 x i16>
|
||||
; CHECK: Cost Model: Found an estimated cost of 24 for instruction: %v43 = fptosi <8 x double> undef to <8 x i8>
|
||||
; CHECK: Cost Model: Found an estimated cost of 20 for instruction: %v44 = fptosi <8 x float> undef to <8 x i64>
|
||||
; CHECK: Cost Model: Found an estimated cost of 24 for instruction: %v45 = fptosi <8 x float> undef to <8 x i32>
|
||||
; Z13: Cost Model: Found an estimated cost of 24 for instruction: %v45 = fptosi <8 x float> undef to <8 x i32>
|
||||
; AR13: Cost Model: Found an estimated cost of 2 for instruction: %v45 = fptosi <8 x float> undef to <8 x i32>
|
||||
; CHECK: Cost Model: Found an estimated cost of 24 for instruction: %v46 = fptosi <8 x float> undef to <8 x i16>
|
||||
; CHECK: Cost Model: Found an estimated cost of 24 for instruction: %v47 = fptosi <8 x float> undef to <8 x i8>
|
||||
; CHECK: Cost Model: Found an estimated cost of 8 for instruction: %v48 = fptosi <16 x double> undef to <16 x i64>
|
||||
|
@ -146,7 +152,8 @@ define void @fptosi() {
|
|||
; CHECK: Cost Model: Found an estimated cost of 48 for instruction: %v50 = fptosi <16 x double> undef to <16 x i16>
|
||||
; CHECK: Cost Model: Found an estimated cost of 48 for instruction: %v51 = fptosi <16 x double> undef to <16 x i8>
|
||||
; CHECK: Cost Model: Found an estimated cost of 40 for instruction: %v52 = fptosi <16 x float> undef to <16 x i64>
|
||||
; CHECK: Cost Model: Found an estimated cost of 48 for instruction: %v53 = fptosi <16 x float> undef to <16 x i32>
|
||||
; Z13: Cost Model: Found an estimated cost of 48 for instruction: %v53 = fptosi <16 x float> undef to <16 x i32>
|
||||
; AR13: Cost Model: Found an estimated cost of 4 for instruction: %v53 = fptosi <16 x float> undef to <16 x i32>
|
||||
; CHECK: Cost Model: Found an estimated cost of 48 for instruction: %v54 = fptosi <16 x float> undef to <16 x i16>
|
||||
; CHECK: Cost Model: Found an estimated cost of 48 for instruction: %v55 = fptosi <16 x float> undef to <16 x i8>
|
||||
|
||||
|
@ -233,7 +240,8 @@ define void @fptoui() {
|
|||
; CHECK: Cost Model: Found an estimated cost of 6 for instruction: %v18 = fptoui <2 x double> undef to <2 x i16>
|
||||
; CHECK: Cost Model: Found an estimated cost of 6 for instruction: %v19 = fptoui <2 x double> undef to <2 x i8>
|
||||
; CHECK: Cost Model: Found an estimated cost of 5 for instruction: %v20 = fptoui <2 x float> undef to <2 x i64>
|
||||
; CHECK: Cost Model: Found an estimated cost of 12 for instruction: %v21 = fptoui <2 x float> undef to <2 x i32>
|
||||
; Z13: Cost Model: Found an estimated cost of 12 for instruction: %v21 = fptoui <2 x float> undef to <2 x i32>
|
||||
; AR13: Cost Model: Found an estimated cost of 1 for instruction: %v21 = fptoui <2 x float> undef to <2 x i32>
|
||||
; CHECK: Cost Model: Found an estimated cost of 6 for instruction: %v22 = fptoui <2 x float> undef to <2 x i16>
|
||||
; CHECK: Cost Model: Found an estimated cost of 6 for instruction: %v23 = fptoui <2 x float> undef to <2 x i8>
|
||||
; CHECK: Cost Model: Found an estimated cost of 6 for instruction: %v24 = fptoui <4 x fp128> undef to <4 x i64>
|
||||
|
@ -245,7 +253,8 @@ define void @fptoui() {
|
|||
; CHECK: Cost Model: Found an estimated cost of 12 for instruction: %v30 = fptoui <4 x double> undef to <4 x i16>
|
||||
; CHECK: Cost Model: Found an estimated cost of 12 for instruction: %v31 = fptoui <4 x double> undef to <4 x i8>
|
||||
; CHECK: Cost Model: Found an estimated cost of 10 for instruction: %v32 = fptoui <4 x float> undef to <4 x i64>
|
||||
; CHECK: Cost Model: Found an estimated cost of 12 for instruction: %v33 = fptoui <4 x float> undef to <4 x i32>
|
||||
; Z13: Cost Model: Found an estimated cost of 12 for instruction: %v33 = fptoui <4 x float> undef to <4 x i32>
|
||||
; AR13: Cost Model: Found an estimated cost of 1 for instruction: %v33 = fptoui <4 x float> undef to <4 x i32>
|
||||
; CHECK: Cost Model: Found an estimated cost of 12 for instruction: %v34 = fptoui <4 x float> undef to <4 x i16>
|
||||
; CHECK: Cost Model: Found an estimated cost of 12 for instruction: %v35 = fptoui <4 x float> undef to <4 x i8>
|
||||
; CHECK: Cost Model: Found an estimated cost of 12 for instruction: %v36 = fptoui <8 x fp128> undef to <8 x i64>
|
||||
|
@ -257,7 +266,8 @@ define void @fptoui() {
|
|||
; CHECK: Cost Model: Found an estimated cost of 24 for instruction: %v42 = fptoui <8 x double> undef to <8 x i16>
|
||||
; CHECK: Cost Model: Found an estimated cost of 24 for instruction: %v43 = fptoui <8 x double> undef to <8 x i8>
|
||||
; CHECK: Cost Model: Found an estimated cost of 20 for instruction: %v44 = fptoui <8 x float> undef to <8 x i64>
|
||||
; CHECK: Cost Model: Found an estimated cost of 24 for instruction: %v45 = fptoui <8 x float> undef to <8 x i32>
|
||||
; Z13: Cost Model: Found an estimated cost of 24 for instruction: %v45 = fptoui <8 x float> undef to <8 x i32>
|
||||
; AR13: Cost Model: Found an estimated cost of 2 for instruction: %v45 = fptoui <8 x float> undef to <8 x i32>
|
||||
; CHECK: Cost Model: Found an estimated cost of 24 for instruction: %v46 = fptoui <8 x float> undef to <8 x i16>
|
||||
; CHECK: Cost Model: Found an estimated cost of 24 for instruction: %v47 = fptoui <8 x float> undef to <8 x i8>
|
||||
; CHECK: Cost Model: Found an estimated cost of 8 for instruction: %v48 = fptoui <16 x double> undef to <16 x i64>
|
||||
|
@ -265,7 +275,8 @@ define void @fptoui() {
|
|||
; CHECK: Cost Model: Found an estimated cost of 48 for instruction: %v50 = fptoui <16 x double> undef to <16 x i16>
|
||||
; CHECK: Cost Model: Found an estimated cost of 48 for instruction: %v51 = fptoui <16 x double> undef to <16 x i8>
|
||||
; CHECK: Cost Model: Found an estimated cost of 40 for instruction: %v52 = fptoui <16 x float> undef to <16 x i64>
|
||||
; CHECK: Cost Model: Found an estimated cost of 48 for instruction: %v53 = fptoui <16 x float> undef to <16 x i32>
|
||||
; Z13: Cost Model: Found an estimated cost of 48 for instruction: %v53 = fptoui <16 x float> undef to <16 x i32>
|
||||
; AR13: Cost Model: Found an estimated cost of 4 for instruction: %v53 = fptoui <16 x float> undef to <16 x i32>
|
||||
; CHECK: Cost Model: Found an estimated cost of 48 for instruction: %v54 = fptoui <16 x float> undef to <16 x i16>
|
||||
; CHECK: Cost Model: Found an estimated cost of 48 for instruction: %v55 = fptoui <16 x float> undef to <16 x i8>
|
||||
|
||||
|
@ -379,7 +390,8 @@ define void @sitofp() {
|
|||
; CHECK: Cost Model: Found an estimated cost of 7 for instruction: %v14 = sitofp <2 x i64> undef to <2 x float>
|
||||
; CHECK: Cost Model: Found an estimated cost of 5 for instruction: %v15 = sitofp <2 x i32> undef to <2 x fp128>
|
||||
; CHECK: Cost Model: Found an estimated cost of 7 for instruction: %v16 = sitofp <2 x i32> undef to <2 x double>
|
||||
; CHECK: Cost Model: Found an estimated cost of 14 for instruction: %v17 = sitofp <2 x i32> undef to <2 x float>
|
||||
; Z13: Cost Model: Found an estimated cost of 14 for instruction: %v17 = sitofp <2 x i32> undef to <2 x float>
|
||||
; AR13: Cost Model: Found an estimated cost of 1 for instruction: %v17 = sitofp <2 x i32> undef to <2 x float>
|
||||
; CHECK: Cost Model: Found an estimated cost of 7 for instruction: %v18 = sitofp <2 x i16> undef to <2 x fp128>
|
||||
; CHECK: Cost Model: Found an estimated cost of 9 for instruction: %v19 = sitofp <2 x i16> undef to <2 x double>
|
||||
; CHECK: Cost Model: Found an estimated cost of 9 for instruction: %v20 = sitofp <2 x i16> undef to <2 x float>
|
||||
|
@ -391,7 +403,8 @@ define void @sitofp() {
|
|||
; CHECK: Cost Model: Found an estimated cost of 13 for instruction: %v26 = sitofp <4 x i64> undef to <4 x float>
|
||||
; CHECK: Cost Model: Found an estimated cost of 9 for instruction: %v27 = sitofp <4 x i32> undef to <4 x fp128>
|
||||
; CHECK: Cost Model: Found an estimated cost of 13 for instruction: %v28 = sitofp <4 x i32> undef to <4 x double>
|
||||
; CHECK: Cost Model: Found an estimated cost of 13 for instruction: %v29 = sitofp <4 x i32> undef to <4 x float>
|
||||
; Z13: Cost Model: Found an estimated cost of 13 for instruction: %v29 = sitofp <4 x i32> undef to <4 x float>
|
||||
; AR13: Cost Model: Found an estimated cost of 1 for instruction: %v29 = sitofp <4 x i32> undef to <4 x float>
|
||||
; CHECK: Cost Model: Found an estimated cost of 13 for instruction: %v30 = sitofp <4 x i16> undef to <4 x fp128>
|
||||
; CHECK: Cost Model: Found an estimated cost of 17 for instruction: %v31 = sitofp <4 x i16> undef to <4 x double>
|
||||
; CHECK: Cost Model: Found an estimated cost of 17 for instruction: %v32 = sitofp <4 x i16> undef to <4 x float>
|
||||
|
@ -403,7 +416,8 @@ define void @sitofp() {
|
|||
; CHECK: Cost Model: Found an estimated cost of 25 for instruction: %v38 = sitofp <8 x i64> undef to <8 x float>
|
||||
; CHECK: Cost Model: Found an estimated cost of 17 for instruction: %v39 = sitofp <8 x i32> undef to <8 x fp128>
|
||||
; CHECK: Cost Model: Found an estimated cost of 25 for instruction: %v40 = sitofp <8 x i32> undef to <8 x double>
|
||||
; CHECK: Cost Model: Found an estimated cost of 25 for instruction: %v41 = sitofp <8 x i32> undef to <8 x float>
|
||||
; Z13: Cost Model: Found an estimated cost of 25 for instruction: %v41 = sitofp <8 x i32> undef to <8 x float>
|
||||
; AR13: Cost Model: Found an estimated cost of 2 for instruction: %v41 = sitofp <8 x i32> undef to <8 x float>
|
||||
; CHECK: Cost Model: Found an estimated cost of 25 for instruction: %v42 = sitofp <8 x i16> undef to <8 x fp128>
|
||||
; CHECK: Cost Model: Found an estimated cost of 33 for instruction: %v43 = sitofp <8 x i16> undef to <8 x double>
|
||||
; CHECK: Cost Model: Found an estimated cost of 33 for instruction: %v44 = sitofp <8 x i16> undef to <8 x float>
|
||||
|
@ -413,7 +427,8 @@ define void @sitofp() {
|
|||
; CHECK: Cost Model: Found an estimated cost of 8 for instruction: %v48 = sitofp <16 x i64> undef to <16 x double>
|
||||
; CHECK: Cost Model: Found an estimated cost of 49 for instruction: %v49 = sitofp <16 x i64> undef to <16 x float>
|
||||
; CHECK: Cost Model: Found an estimated cost of 49 for instruction: %v50 = sitofp <16 x i32> undef to <16 x double>
|
||||
; CHECK: Cost Model: Found an estimated cost of 49 for instruction: %v51 = sitofp <16 x i32> undef to <16 x float>
|
||||
; Z13: Cost Model: Found an estimated cost of 49 for instruction: %v51 = sitofp <16 x i32> undef to <16 x float>
|
||||
; AR13: Cost Model: Found an estimated cost of 4 for instruction: %v51 = sitofp <16 x i32> undef to <16 x float>
|
||||
; CHECK: Cost Model: Found an estimated cost of 65 for instruction: %v52 = sitofp <16 x i16> undef to <16 x double>
|
||||
; CHECK: Cost Model: Found an estimated cost of 65 for instruction: %v53 = sitofp <16 x i16> undef to <16 x float>
|
||||
; CHECK: Cost Model: Found an estimated cost of 65 for instruction: %v54 = sitofp <16 x i8> undef to <16 x double>
|
||||
|
@ -497,7 +512,8 @@ define void @uitofp() {
|
|||
; CHECK: Cost Model: Found an estimated cost of 7 for instruction: %v14 = uitofp <2 x i64> undef to <2 x float>
|
||||
; CHECK: Cost Model: Found an estimated cost of 5 for instruction: %v15 = uitofp <2 x i32> undef to <2 x fp128>
|
||||
; CHECK: Cost Model: Found an estimated cost of 7 for instruction: %v16 = uitofp <2 x i32> undef to <2 x double>
|
||||
; CHECK: Cost Model: Found an estimated cost of 14 for instruction: %v17 = uitofp <2 x i32> undef to <2 x float>
|
||||
; Z13: Cost Model: Found an estimated cost of 14 for instruction: %v17 = uitofp <2 x i32> undef to <2 x float>
|
||||
; AR13: Cost Model: Found an estimated cost of 1 for instruction: %v17 = uitofp <2 x i32> undef to <2 x float>
|
||||
; CHECK: Cost Model: Found an estimated cost of 7 for instruction: %v18 = uitofp <2 x i16> undef to <2 x fp128>
|
||||
; CHECK: Cost Model: Found an estimated cost of 9 for instruction: %v19 = uitofp <2 x i16> undef to <2 x double>
|
||||
; CHECK: Cost Model: Found an estimated cost of 9 for instruction: %v20 = uitofp <2 x i16> undef to <2 x float>
|
||||
|
@ -509,7 +525,8 @@ define void @uitofp() {
|
|||
; CHECK: Cost Model: Found an estimated cost of 13 for instruction: %v26 = uitofp <4 x i64> undef to <4 x float>
|
||||
; CHECK: Cost Model: Found an estimated cost of 9 for instruction: %v27 = uitofp <4 x i32> undef to <4 x fp128>
|
||||
; CHECK: Cost Model: Found an estimated cost of 13 for instruction: %v28 = uitofp <4 x i32> undef to <4 x double>
|
||||
; CHECK: Cost Model: Found an estimated cost of 13 for instruction: %v29 = uitofp <4 x i32> undef to <4 x float>
|
||||
; Z13: Cost Model: Found an estimated cost of 13 for instruction: %v29 = uitofp <4 x i32> undef to <4 x float>
|
||||
; AR13: Cost Model: Found an estimated cost of 1 for instruction: %v29 = uitofp <4 x i32> undef to <4 x float>
|
||||
; CHECK: Cost Model: Found an estimated cost of 13 for instruction: %v30 = uitofp <4 x i16> undef to <4 x fp128>
|
||||
; CHECK: Cost Model: Found an estimated cost of 17 for instruction: %v31 = uitofp <4 x i16> undef to <4 x double>
|
||||
; CHECK: Cost Model: Found an estimated cost of 17 for instruction: %v32 = uitofp <4 x i16> undef to <4 x float>
|
||||
|
@ -521,7 +538,8 @@ define void @uitofp() {
|
|||
; CHECK: Cost Model: Found an estimated cost of 25 for instruction: %v38 = uitofp <8 x i64> undef to <8 x float>
|
||||
; CHECK: Cost Model: Found an estimated cost of 17 for instruction: %v39 = uitofp <8 x i32> undef to <8 x fp128>
|
||||
; CHECK: Cost Model: Found an estimated cost of 25 for instruction: %v40 = uitofp <8 x i32> undef to <8 x double>
|
||||
; CHECK: Cost Model: Found an estimated cost of 25 for instruction: %v41 = uitofp <8 x i32> undef to <8 x float>
|
||||
; Z13: Cost Model: Found an estimated cost of 25 for instruction: %v41 = uitofp <8 x i32> undef to <8 x float>
|
||||
; AR13: Cost Model: Found an estimated cost of 2 for instruction: %v41 = uitofp <8 x i32> undef to <8 x float>
|
||||
; CHECK: Cost Model: Found an estimated cost of 25 for instruction: %v42 = uitofp <8 x i16> undef to <8 x fp128>
|
||||
; CHECK: Cost Model: Found an estimated cost of 33 for instruction: %v43 = uitofp <8 x i16> undef to <8 x double>
|
||||
; CHECK: Cost Model: Found an estimated cost of 33 for instruction: %v44 = uitofp <8 x i16> undef to <8 x float>
|
||||
|
@ -531,7 +549,8 @@ define void @uitofp() {
|
|||
; CHECK: Cost Model: Found an estimated cost of 8 for instruction: %v48 = uitofp <16 x i64> undef to <16 x double>
|
||||
; CHECK: Cost Model: Found an estimated cost of 49 for instruction: %v49 = uitofp <16 x i64> undef to <16 x float>
|
||||
; CHECK: Cost Model: Found an estimated cost of 49 for instruction: %v50 = uitofp <16 x i32> undef to <16 x double>
|
||||
; CHECK: Cost Model: Found an estimated cost of 49 for instruction: %v51 = uitofp <16 x i32> undef to <16 x float>
|
||||
; Z13: Cost Model: Found an estimated cost of 49 for instruction: %v51 = uitofp <16 x i32> undef to <16 x float>
|
||||
; AR13: Cost Model: Found an estimated cost of 4 for instruction: %v51 = uitofp <16 x i32> undef to <16 x float>
|
||||
; CHECK: Cost Model: Found an estimated cost of 65 for instruction: %v52 = uitofp <16 x i16> undef to <16 x double>
|
||||
; CHECK: Cost Model: Found an estimated cost of 65 for instruction: %v53 = uitofp <16 x i16> undef to <16 x float>
|
||||
; CHECK: Cost Model: Found an estimated cost of 65 for instruction: %v54 = uitofp <16 x i8> undef to <16 x double>
|
||||
|
|
|
@ -1,4 +1,7 @@
|
|||
; RUN: opt < %s -cost-model -analyze -mtriple=systemz-unknown -mcpu=z13 | FileCheck %s
|
||||
; RUN: opt < %s -cost-model -analyze -mtriple=systemz-unknown -mcpu=z13 \
|
||||
; RUN: | FileCheck %s -check-prefixes=CHECK,Z13
|
||||
; RUN: opt < %s -cost-model -analyze -mtriple=systemz-unknown -mcpu=arch13 \
|
||||
; RUN: | FileCheck %s -check-prefixes=CHECK,AR13
|
||||
|
||||
define void @bswap_i64(i64 %arg, <2 x i64> %arg2) {
|
||||
; CHECK: Printing analysis 'Cost Model Analysis' for function 'bswap_i64':
|
||||
|
@ -63,6 +66,32 @@ define void @bswap_i64_mem(i64* %src, i64 %arg, i64* %dst) {
|
|||
ret void
|
||||
}
|
||||
|
||||
define void @bswap_v2i64_mem(<2 x i64>* %src, <2 x i64> %arg, <2 x i64>* %dst) {
|
||||
; CHECK:Printing analysis 'Cost Model Analysis' for function 'bswap_v2i64_mem':
|
||||
; Z13: Cost Model: Found an estimated cost of 1 for instruction: %Ld1 = load <2 x i64>, <2 x i64>* %src
|
||||
; AR13: Cost Model: Found an estimated cost of 0 for instruction: %Ld1 = load <2 x i64>, <2 x i64>* %src
|
||||
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %swp1 = tail call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %Ld1)
|
||||
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %swp2 = tail call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %arg)
|
||||
; Z13: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i64> %swp2, <2 x i64>* %dst
|
||||
; AR13: Cost Model: Found an estimated cost of 0 for instruction: store <2 x i64> %swp2, <2 x i64>* %dst
|
||||
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %Ld2 = load <2 x i64>, <2 x i64>* %src
|
||||
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %swp3 = tail call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %Ld2)
|
||||
; Z13: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i64> %swp3, <2 x i64>* %dst
|
||||
; AR13: Cost Model: Found an estimated cost of 0 for instruction: store <2 x i64> %swp3, <2 x i64>* %dst
|
||||
|
||||
%Ld1 = load <2 x i64>, <2 x i64>* %src
|
||||
%swp1 = tail call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %Ld1)
|
||||
|
||||
%swp2 = tail call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %arg)
|
||||
store <2 x i64> %swp2, <2 x i64>* %dst
|
||||
|
||||
%Ld2 = load <2 x i64>, <2 x i64>* %src
|
||||
%swp3 = tail call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %Ld2)
|
||||
store <2 x i64> %swp3, <2 x i64>* %dst
|
||||
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @bswap_i32_mem(i32* %src, i32 %arg, i32* %dst) {
|
||||
; CHECK: Printing analysis 'Cost Model Analysis' for function 'bswap_i32_mem':
|
||||
; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %Ld1 = load i32, i32* %src
|
||||
|
@ -85,6 +114,31 @@ define void @bswap_i32_mem(i32* %src, i32 %arg, i32* %dst) {
|
|||
ret void
|
||||
}
|
||||
|
||||
define void @bswap_v4i32_mem(<4 x i32>* %src, <4 x i32> %arg, <4 x i32>* %dst) {
|
||||
; CHECK: Printing analysis 'Cost Model Analysis' for function 'bswap_v4i32_mem':
|
||||
; Z13: Cost Model: Found an estimated cost of 1 for instruction: %Ld1 = load <4 x i32>, <4 x i32>* %src
|
||||
; AR13: Cost Model: Found an estimated cost of 0 for instruction: %Ld1 = load <4 x i32>, <4 x i32>* %src
|
||||
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %swp1 = tail call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %Ld1)
|
||||
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %swp2 = tail call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %arg)
|
||||
; Z13: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> %swp2, <4 x i32>* %dst
|
||||
; AR13: Cost Model: Found an estimated cost of 0 for instruction: store <4 x i32> %swp2, <4 x i32>* %dst
|
||||
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %Ld2 = load <4 x i32>, <4 x i32>* %src
|
||||
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %swp3 = tail call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %Ld2)
|
||||
; Z13: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> %swp3, <4 x i32>* %dst
|
||||
; AR13: Cost Model: Found an estimated cost of 0 for instruction: store <4 x i32> %swp3, <4 x i32>* %dst
|
||||
%Ld1 = load <4 x i32>, <4 x i32>* %src
|
||||
%swp1 = tail call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %Ld1)
|
||||
|
||||
%swp2 = tail call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %arg)
|
||||
store <4 x i32> %swp2, <4 x i32>* %dst
|
||||
|
||||
%Ld2 = load <4 x i32>, <4 x i32>* %src
|
||||
%swp3 = tail call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %Ld2)
|
||||
store <4 x i32> %swp3, <4 x i32>* %dst
|
||||
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @bswap_i16_mem(i16* %src, i16 %arg, i16* %dst) {
|
||||
; CHECK: Printing analysis 'Cost Model Analysis' for function 'bswap_i16_mem':
|
||||
; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %Ld1 = load i16, i16* %src
|
||||
|
@ -107,6 +161,30 @@ define void @bswap_i16_mem(i16* %src, i16 %arg, i16* %dst) {
|
|||
ret void
|
||||
}
|
||||
|
||||
define void @bswap_v8i16_mem(<8 x i16>* %src, <8 x i16> %arg, <8 x i16>* %dst) {
|
||||
; CHECK: Printing analysis 'Cost Model Analysis' for function 'bswap_v8i16_mem':
|
||||
; Z13: Cost Model: Found an estimated cost of 1 for instruction: %Ld1 = load <8 x i16>, <8 x i16>* %src
|
||||
; AR13: Cost Model: Found an estimated cost of 0 for instruction: %Ld1 = load <8 x i16>, <8 x i16>* %src
|
||||
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %swp1 = tail call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %Ld1)
|
||||
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %swp2 = tail call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %arg)
|
||||
; Z13: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> %swp2, <8 x i16>* %dst
|
||||
; AR13: Cost Model: Found an estimated cost of 0 for instruction: store <8 x i16> %swp2, <8 x i16>* %dst
|
||||
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %Ld2 = load <8 x i16>, <8 x i16>* %src
|
||||
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %swp3 = tail call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %Ld2)
|
||||
; Z13: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> %swp3, <8 x i16>* %dst
|
||||
; AR13: Cost Model: Found an estimated cost of 0 for instruction: store <8 x i16> %swp3, <8 x i16>* %dst
|
||||
%Ld1 = load <8 x i16>, <8 x i16>* %src
|
||||
%swp1 = tail call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %Ld1)
|
||||
|
||||
%swp2 = tail call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %arg)
|
||||
store <8 x i16> %swp2, <8 x i16>* %dst
|
||||
|
||||
%Ld2 = load <8 x i16>, <8 x i16>* %src
|
||||
%swp3 = tail call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %Ld2)
|
||||
store <8 x i16> %swp3, <8 x i16>* %dst
|
||||
|
||||
ret void
|
||||
}
|
||||
|
||||
declare i64 @llvm.bswap.i64(i64)
|
||||
declare <2 x i64> @llvm.bswap.v2i64(<2 x i64>)
|
||||
|
|
|
@ -0,0 +1,97 @@
|
|||
; RUN: opt < %s -cost-model -analyze -mtriple=systemz-unknown -mcpu=z13 \
|
||||
; RUN: | FileCheck %s -check-prefixes=CHECK,Z13
|
||||
; RUN: opt < %s -cost-model -analyze -mtriple=systemz-unknown -mcpu=arch13 \
|
||||
; RUN: | FileCheck %s -check-prefixes=CHECK,AR13
|
||||
|
||||
define void @fun0(i32 %a) {
|
||||
; CHECK-LABEL: Printing analysis 'Cost Model Analysis' for function 'fun0':
|
||||
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %c0 = xor i32 %l0, -1
|
||||
; Z13: Cost Model: Found an estimated cost of 1 for instruction: %res0 = or i32 %a, %c0
|
||||
; AR13: Cost Model: Found an estimated cost of 0 for instruction: %res0 = or i32 %a, %c0
|
||||
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %c1 = xor i32 %l1, -1
|
||||
; Z13: Cost Model: Found an estimated cost of 1 for instruction: %res1 = and i32 %a, %c1
|
||||
; AR13: Cost Model: Found an estimated cost of 0 for instruction: %res1 = and i32 %a, %c1
|
||||
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %c2 = and i32 %l2, %a
|
||||
; Z13: Cost Model: Found an estimated cost of 1 for instruction: %res2 = xor i32 %c2, -1
|
||||
; AR13: Cost Model: Found an estimated cost of 0 for instruction: %res2 = xor i32 %c2, -1
|
||||
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %c3 = or i32 %l3, %a
|
||||
; Z13: Cost Model: Found an estimated cost of 1 for instruction: %res3 = xor i32 %c3, -1
|
||||
; AR13: Cost Model: Found an estimated cost of 0 for instruction: %res3 = xor i32 %c3, -1
|
||||
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %c4 = xor i32 %l4, %a
|
||||
; Z13: Cost Model: Found an estimated cost of 1 for instruction: %res4 = xor i32 %c4, -1
|
||||
; AR13: Cost Model: Found an estimated cost of 0 for instruction: %res4 = xor i32 %c4, -1
|
||||
|
||||
entry:
|
||||
%l0 = load i32, i32* undef
|
||||
%c0 = xor i32 %l0, -1
|
||||
%res0 = or i32 %a, %c0
|
||||
store i32 %res0, i32* undef
|
||||
|
||||
%l1 = load i32, i32* undef
|
||||
%c1 = xor i32 %l1, -1
|
||||
%res1 = and i32 %a, %c1
|
||||
store i32 %res1, i32* undef
|
||||
|
||||
%l2 = load i32, i32* undef
|
||||
%c2 = and i32 %l2, %a
|
||||
%res2 = xor i32 %c2, -1
|
||||
store i32 %res2, i32* undef
|
||||
|
||||
%l3 = load i32, i32* undef
|
||||
%c3 = or i32 %l3, %a
|
||||
%res3 = xor i32 %c3, -1
|
||||
store i32 %res3, i32* undef
|
||||
|
||||
%l4 = load i32, i32* undef
|
||||
%c4 = xor i32 %l4, %a
|
||||
%res4 = xor i32 %c4, -1
|
||||
store i32 %res4, i32* undef
|
||||
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @fun1(i64 %a) {
|
||||
; CHECK-LABEL: Printing analysis 'Cost Model Analysis' for function 'fun1':
|
||||
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %c0 = xor i64 %l0, -1
|
||||
; Z13: Cost Model: Found an estimated cost of 1 for instruction: %res0 = or i64 %a, %c0
|
||||
; AR13: Cost Model: Found an estimated cost of 0 for instruction: %res0 = or i64 %a, %c0
|
||||
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %c1 = xor i64 %l1, -1
|
||||
; Z13: Cost Model: Found an estimated cost of 1 for instruction: %res1 = and i64 %a, %c1
|
||||
; AR13: Cost Model: Found an estimated cost of 0 for instruction: %res1 = and i64 %a, %c1
|
||||
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %c2 = and i64 %l2, %a
|
||||
; Z13: Cost Model: Found an estimated cost of 1 for instruction: %res2 = xor i64 %c2, -1
|
||||
; AR13: Cost Model: Found an estimated cost of 0 for instruction: %res2 = xor i64 %c2, -1
|
||||
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %c3 = or i64 %l3, %a
|
||||
; Z13: Cost Model: Found an estimated cost of 1 for instruction: %res3 = xor i64 %c3, -1
|
||||
; AR13: Cost Model: Found an estimated cost of 0 for instruction: %res3 = xor i64 %c3, -1
|
||||
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %c4 = xor i64 %l4, %a
|
||||
; Z13: Cost Model: Found an estimated cost of 1 for instruction: %res4 = xor i64 %c4, -1
|
||||
; AR13: Cost Model: Found an estimated cost of 0 for instruction: %res4 = xor i64 %c4, -1
|
||||
entry:
|
||||
%l0 = load i64, i64* undef
|
||||
%c0 = xor i64 %l0, -1
|
||||
%res0 = or i64 %a, %c0
|
||||
store i64 %res0, i64* undef
|
||||
|
||||
%l1 = load i64, i64* undef
|
||||
%c1 = xor i64 %l1, -1
|
||||
%res1 = and i64 %a, %c1
|
||||
store i64 %res1, i64* undef
|
||||
|
||||
%l2 = load i64, i64* undef
|
||||
%c2 = and i64 %l2, %a
|
||||
%res2 = xor i64 %c2, -1
|
||||
store i64 %res2, i64* undef
|
||||
|
||||
%l3 = load i64, i64* undef
|
||||
%c3 = or i64 %l3, %a
|
||||
%res3 = xor i64 %c3, -1
|
||||
store i64 %res3, i64* undef
|
||||
|
||||
%l4 = load i64, i64* undef
|
||||
%c4 = xor i64 %l4, %a
|
||||
%res4 = xor i64 %c4, -1
|
||||
store i64 %res4, i64* undef
|
||||
|
||||
ret void
|
||||
}
|
|
@ -5,6 +5,9 @@
|
|||
; Run the test again to make sure it still works the same even
|
||||
; in the presence of the load-store-on-condition-2 facility.
|
||||
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 -verify-machineinstrs | FileCheck %s
|
||||
;
|
||||
; And again in the presence of the select instructions.
|
||||
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch13 -verify-machineinstrs | FileCheck %s
|
||||
|
||||
; Test LOCR.
|
||||
define i32 @f1(i32 %a, i32 %b, i32 %limit) {
|
||||
|
|
|
@ -1,6 +1,11 @@
|
|||
; Test LOCHI and LOCGHI.
|
||||
;
|
||||
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 -verify-machineinstrs | FileCheck %s
|
||||
;
|
||||
; Run the test again to make sure it still works the same even
|
||||
; in the presence of the select instructions.
|
||||
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch13 -verify-machineinstrs | FileCheck %s
|
||||
|
||||
|
||||
define i32 @f1(i32 %x) {
|
||||
; CHECK-LABEL: f1:
|
||||
|
|
|
@ -3,31 +3,36 @@
|
|||
;
|
||||
; RUN: llc < %s -verify-machineinstrs -mtriple=s390x-linux-gnu -mcpu=z13 \
|
||||
; RUN: -no-integrated-as | FileCheck %s
|
||||
;
|
||||
; Run the test again to make sure it still works the same even
|
||||
; in the presence of the select instructions.
|
||||
; RUN: llc < %s -verify-machineinstrs -mtriple=s390x-linux-gnu -mcpu=arch13 \
|
||||
; RUN: -no-integrated-as | FileCheck %s
|
||||
|
||||
define void @f1(i32 %limit) {
|
||||
; CHECK-LABEL: f1:
|
||||
; CHECK-DAG: stepa [[REG1:%r[0-5]]]
|
||||
; CHECK-DAG: stepb [[REG2:%r[0-5]]]
|
||||
; CHECK-DAG: clfi %r2, 42
|
||||
; CHECK: locfhrl [[REG2]], [[REG1]]
|
||||
; CHECK: stepc [[REG2]]
|
||||
; CHECK: locfhrhe [[REG1]], [[REG2]]
|
||||
; CHECK: stepc [[REG1]]
|
||||
; CHECK: br %r14
|
||||
%a = call i32 asm sideeffect "stepa $0", "=h"()
|
||||
%b = call i32 asm sideeffect "stepb $0", "=h"()
|
||||
%cond = icmp ult i32 %limit, 42
|
||||
%res = select i1 %cond, i32 %a, i32 %b
|
||||
call void asm sideeffect "stepc $0", "h"(i32 %res)
|
||||
call void asm sideeffect "use $0", "h"(i32 %b)
|
||||
ret void
|
||||
}
|
||||
|
||||
; FIXME: We should commute the LOCRMux to save one move.
|
||||
define void @f2(i32 %limit) {
|
||||
; CHECK-LABEL: f2:
|
||||
; CHECK-DAG: stepa [[REG1:%r[0-5]]]
|
||||
; CHECK-DAG: stepb [[REG2:%r[0-5]]]
|
||||
; CHECK-DAG: clijhe %r2, 42,
|
||||
; CHECK: risblg [[REG2]], [[REG1]], 0, 159, 32
|
||||
; CHECK-DAG: clijl %r2, 42, [[LABEL:.LBB[0-9_]+]]
|
||||
; CHECK: risbhg [[REG1]], [[REG2]], 0, 159, 32
|
||||
; CHECK: [[LABEL]]
|
||||
; CHECK: stepc [[REG1]]
|
||||
; CHECK: br %r14
|
||||
%dummy = call i32 asm sideeffect "dummy $0", "=h"()
|
||||
|
@ -37,16 +42,18 @@ define void @f2(i32 %limit) {
|
|||
%res = select i1 %cond, i32 %a, i32 %b
|
||||
call void asm sideeffect "stepc $0", "h"(i32 %res)
|
||||
call void asm sideeffect "dummy $0", "h"(i32 %dummy)
|
||||
call void asm sideeffect "use $0", "r"(i32 %b)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @f3(i32 %limit) {
|
||||
; CHECK-LABEL: f3:
|
||||
; CHECK-DAG: stepa [[REG2:%r[0-5]]]
|
||||
; CHECK-DAG: stepb [[REG1:%r[0-5]]]
|
||||
; CHECK-DAG: clijhe %r2, 42,
|
||||
; CHECK: risbhg [[REG1]], [[REG2]], 0, 159, 32
|
||||
; CHECK: stepc [[REG1]]
|
||||
; CHECK-DAG: stepa [[REG1:%r[0-5]]]
|
||||
; CHECK-DAG: stepb [[REG2:%r[0-5]]]
|
||||
; CHECK-DAG: clijhe %r2, 42, [[LABEL:.LBB[0-9_]+]]
|
||||
; CHECK: risbhg [[REG2]], [[REG1]], 0, 159, 32
|
||||
; CHECK: [[LABEL]]
|
||||
; CHECK: stepc [[REG2]]
|
||||
; CHECK: br %r14
|
||||
%dummy = call i32 asm sideeffect "dummy $0", "=h"()
|
||||
%a = call i32 asm sideeffect "stepa $0", "=r"()
|
||||
|
@ -55,17 +62,17 @@ define void @f3(i32 %limit) {
|
|||
%res = select i1 %cond, i32 %a, i32 %b
|
||||
call void asm sideeffect "stepc $0", "h"(i32 %res)
|
||||
call void asm sideeffect "dummy $0", "h"(i32 %dummy)
|
||||
call void asm sideeffect "use $0", "r"(i32 %a)
|
||||
ret void
|
||||
}
|
||||
|
||||
; FIXME: We should commute the LOCRMux to save one move.
|
||||
define void @f4(i32 %limit) {
|
||||
; CHECK-LABEL: f4:
|
||||
; CHECK-DAG: stepa [[REG1:%r[0-5]]]
|
||||
; CHECK-DAG: stepb [[REG2:%r[0-5]]]
|
||||
; CHECK-DAG: clijhe %r2, 42,
|
||||
; CHECK: risbhg [[REG2]], [[REG1]], 0, 159, 32
|
||||
; CHECK-DAG: clijl %r2, 42, [[LABEL:.LBB[0-9_]+]]
|
||||
; CHECK: risblg [[REG1]], [[REG2]], 0, 159, 32
|
||||
; CHECK: [[LABEL]]
|
||||
; CHECK: stepc [[REG1]]
|
||||
; CHECK: br %r14
|
||||
%dummy = call i32 asm sideeffect "dummy $0", "=h"()
|
||||
|
@ -75,6 +82,7 @@ define void @f4(i32 %limit) {
|
|||
%res = select i1 %cond, i32 %a, i32 %b
|
||||
call void asm sideeffect "stepc $0", "r"(i32 %res)
|
||||
call void asm sideeffect "dummy $0", "h"(i32 %dummy)
|
||||
call void asm sideeffect "use $0", "h"(i32 %b)
|
||||
ret void
|
||||
}
|
||||
|
||||
|
@ -82,8 +90,9 @@ define void @f5(i32 %limit) {
|
|||
; CHECK-LABEL: f5:
|
||||
; CHECK-DAG: stepa [[REG2:%r[0-5]]]
|
||||
; CHECK-DAG: stepb [[REG1:%r[0-5]]]
|
||||
; CHECK-DAG: clijhe %r2, 42,
|
||||
; CHECK-DAG: clijhe %r2, 42, [[LABEL:.LBB[0-9_]+]]
|
||||
; CHECK: risblg [[REG1]], [[REG2]], 0, 159, 32
|
||||
; CHECK: [[LABEL]]
|
||||
; CHECK: stepc [[REG1]]
|
||||
; CHECK: br %r14
|
||||
%dummy = call i32 asm sideeffect "dummy $0", "=h"()
|
||||
|
@ -102,8 +111,8 @@ define void @f6(i32 %limit) {
|
|||
; CHECK-DAG: stepa [[REG1:%r[0-5]]]
|
||||
; CHECK-DAG: stepb [[REG2:%r[0-5]]]
|
||||
; CHECK-DAG: clfi %r2, 41
|
||||
; CHECK: locfhrle [[REG2]], [[REG1]]
|
||||
; CHECK: stepc [[REG2]]
|
||||
; CHECK: locfhrh [[REG1]], [[REG2]]
|
||||
; CHECK: stepc [[REG1]]
|
||||
; CHECK: br %r14
|
||||
entry:
|
||||
%a = call i32 asm sideeffect "stepa $0", "=h"()
|
||||
|
@ -117,6 +126,7 @@ if.then:
|
|||
return:
|
||||
%res = phi i32 [ %a, %if.then ], [ %b, %entry ]
|
||||
call void asm sideeffect "stepc $0", "h"(i32 %res)
|
||||
call void asm sideeffect "use $0", "h"(i32 %b)
|
||||
ret void
|
||||
}
|
||||
|
||||
|
@ -126,8 +136,8 @@ define void @f7(i32 %limit) {
|
|||
; CHECK-DAG: stepa [[REG1:%r[0-5]]]
|
||||
; CHECK-DAG: stepb [[REG2:%r[0-5]]]
|
||||
; CHECK-DAG: clfi %r2, 41
|
||||
; CHECK: locfhrh [[REG2]], [[REG1]]
|
||||
; CHECK: stepc [[REG2]]
|
||||
; CHECK: locfhrle [[REG1]], [[REG2]]
|
||||
; CHECK: stepc [[REG1]]
|
||||
; CHECK: br %r14
|
||||
entry:
|
||||
%a = call i32 asm sideeffect "stepa $0", "=h"()
|
||||
|
@ -141,6 +151,7 @@ if.then:
|
|||
return:
|
||||
%res = phi i32 [ %b, %if.then ], [ %a, %entry ]
|
||||
call void asm sideeffect "stepc $0", "h"(i32 %res)
|
||||
call void asm sideeffect "use $0", "h"(i32 %b)
|
||||
ret void
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,121 @@
|
|||
; Test SELR and SELGR.
|
||||
;
|
||||
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch13 -verify-machineinstrs | FileCheck %s
|
||||
|
||||
; Test SELR.
|
||||
define i32 @f1(i32 %limit, i32 %a, i32 %b) {
|
||||
; CHECK-LABEL: f1:
|
||||
; CHECK: clfi %r2, 42
|
||||
; CHECK: selrl %r2, %r3, %r4
|
||||
; CHECK: br %r14
|
||||
%cond = icmp ult i32 %limit, 42
|
||||
%res = select i1 %cond, i32 %a, i32 %b
|
||||
ret i32 %res
|
||||
}
|
||||
|
||||
; Test SELGR.
|
||||
define i64 @f2(i64 %limit, i64 %a, i64 %b) {
|
||||
; CHECK-LABEL: f2:
|
||||
; CHECK: clgfi %r2, 42
|
||||
; CHECK: selgrl %r2, %r3, %r4
|
||||
; CHECK: br %r14
|
||||
%cond = icmp ult i64 %limit, 42
|
||||
%res = select i1 %cond, i64 %a, i64 %b
|
||||
ret i64 %res
|
||||
}
|
||||
|
||||
; Test SELR in a case that could use COMPARE AND BRANCH. We prefer using
|
||||
; SELR if possible.
|
||||
define i32 @f3(i32 %limit, i32 %a, i32 %b) {
|
||||
; CHECK-LABEL: f3:
|
||||
; CHECK: chi %r2, 42
|
||||
; CHECK: selre %r2, %r3, %r4
|
||||
; CHECK: br %r14
|
||||
%cond = icmp eq i32 %limit, 42
|
||||
%res = select i1 %cond, i32 %a, i32 %b
|
||||
ret i32 %res
|
||||
}
|
||||
|
||||
; ...and again for SELGR.
|
||||
define i64 @f4(i64 %limit, i64 %a, i64 %b) {
|
||||
; CHECK-LABEL: f4:
|
||||
; CHECK: cghi %r2, 42
|
||||
; CHECK: selgre %r2, %r3, %r4
|
||||
; CHECK: br %r14
|
||||
%cond = icmp eq i64 %limit, 42
|
||||
%res = select i1 %cond, i64 %a, i64 %b
|
||||
ret i64 %res
|
||||
}
|
||||
|
||||
; Check that we also get SELR as a result of early if-conversion.
|
||||
define i32 @f5(i32 %limit, i32 %a, i32 %b) {
|
||||
; CHECK-LABEL: f5:
|
||||
; CHECK: clfi %r2, 41
|
||||
; CHECK: selrh %r2, %r4, %r3
|
||||
; CHECK: br %r14
|
||||
entry:
|
||||
%cond = icmp ult i32 %limit, 42
|
||||
br i1 %cond, label %if.then, label %return
|
||||
|
||||
if.then:
|
||||
br label %return
|
||||
|
||||
return:
|
||||
%res = phi i32 [ %a, %if.then ], [ %b, %entry ]
|
||||
ret i32 %res
|
||||
}
|
||||
|
||||
; ... and likewise for SELGR.
|
||||
define i64 @f6(i64 %limit, i64 %a, i64 %b) {
|
||||
; CHECK-LABEL: f6:
|
||||
; CHECK: clgfi %r2, 41
|
||||
; CHECK: selgrh %r2, %r4, %r3
|
||||
; CHECK: br %r14
|
||||
entry:
|
||||
%cond = icmp ult i64 %limit, 42
|
||||
br i1 %cond, label %if.then, label %return
|
||||
|
||||
if.then:
|
||||
br label %return
|
||||
|
||||
return:
|
||||
%res = phi i64 [ %a, %if.then ], [ %b, %entry ]
|
||||
ret i64 %res
|
||||
}
|
||||
|
||||
; Check that inverting the condition works as well.
|
||||
define i32 @f7(i32 %limit, i32 %a, i32 %b) {
|
||||
; CHECK-LABEL: f7:
|
||||
; CHECK: clfi %r2, 41
|
||||
; CHECK: selrh %r2, %r3, %r4
|
||||
; CHECK: br %r14
|
||||
entry:
|
||||
%cond = icmp ult i32 %limit, 42
|
||||
br i1 %cond, label %if.then, label %return
|
||||
|
||||
if.then:
|
||||
br label %return
|
||||
|
||||
return:
|
||||
%res = phi i32 [ %b, %if.then ], [ %a, %entry ]
|
||||
ret i32 %res
|
||||
}
|
||||
|
||||
; ... and likewise for SELGR.
|
||||
define i64 @f8(i64 %limit, i64 %a, i64 %b) {
|
||||
; CHECK-LABEL: f8:
|
||||
; CHECK: clgfi %r2, 41
|
||||
; CHECK: selgrh %r2, %r3, %r4
|
||||
; CHECK: br %r14
|
||||
entry:
|
||||
%cond = icmp ult i64 %limit, 42
|
||||
br i1 %cond, label %if.then, label %return
|
||||
|
||||
if.then:
|
||||
br label %return
|
||||
|
||||
return:
|
||||
%res = phi i64 [ %b, %if.then ], [ %a, %entry ]
|
||||
ret i64 %res
|
||||
}
|
||||
|
|
@ -0,0 +1,76 @@
|
|||
; Test SELFHR.
|
||||
; See comments in asm-18.ll about testing high-word operations.
|
||||
;
|
||||
; RUN: llc < %s -verify-machineinstrs -mtriple=s390x-linux-gnu -mcpu=arch13 \
|
||||
; RUN: -no-integrated-as | FileCheck %s
|
||||
|
||||
define void @f1(i32 %limit) {
|
||||
; CHECK-LABEL: f1:
|
||||
; CHECK-DAG: stepa [[REG1:%r[0-5]]]
|
||||
; CHECK-DAG: stepb [[REG2:%r[0-5]]]
|
||||
; CHECK-DAG: clfi %r2, 42
|
||||
; CHECK: selfhrl [[REG3:%r[0-5]]], [[REG1]], [[REG2]]
|
||||
; CHECK: stepc [[REG3]]
|
||||
; CHECK: br %r14
|
||||
%a = call i32 asm sideeffect "stepa $0", "=h"()
|
||||
%b = call i32 asm sideeffect "stepb $0", "=h"()
|
||||
%cond = icmp ult i32 %limit, 42
|
||||
%res = select i1 %cond, i32 %a, i32 %b
|
||||
call void asm sideeffect "stepc $0", "h"(i32 %res)
|
||||
call void asm sideeffect "use $0", "h"(i32 %a)
|
||||
call void asm sideeffect "use $0", "h"(i32 %b)
|
||||
ret void
|
||||
}
|
||||
|
||||
; Check that we also get SELFHR as a result of early if-conversion.
|
||||
define void @f2(i32 %limit) {
|
||||
; CHECK-LABEL: f2:
|
||||
; CHECK-DAG: stepa [[REG1:%r[0-5]]]
|
||||
; CHECK-DAG: stepb [[REG2:%r[0-5]]]
|
||||
; CHECK-DAG: clfi %r2, 41
|
||||
; CHECK: selfhrh [[REG3:%r[0-5]]], [[REG2]], [[REG1]]
|
||||
; CHECK: stepc [[REG3]]
|
||||
; CHECK: br %r14
|
||||
entry:
|
||||
%a = call i32 asm sideeffect "stepa $0", "=h"()
|
||||
%b = call i32 asm sideeffect "stepb $0", "=h"()
|
||||
%cond = icmp ult i32 %limit, 42
|
||||
br i1 %cond, label %if.then, label %return
|
||||
|
||||
if.then:
|
||||
br label %return
|
||||
|
||||
return:
|
||||
%res = phi i32 [ %a, %if.then ], [ %b, %entry ]
|
||||
call void asm sideeffect "stepc $0", "h"(i32 %res)
|
||||
call void asm sideeffect "use $0", "h"(i32 %a)
|
||||
call void asm sideeffect "use $0", "h"(i32 %b)
|
||||
ret void
|
||||
}
|
||||
|
||||
; Check that inverting the condition works as well.
|
||||
define void @f3(i32 %limit) {
|
||||
; CHECK-LABEL: f3:
|
||||
; CHECK-DAG: stepa [[REG1:%r[0-5]]]
|
||||
; CHECK-DAG: stepb [[REG2:%r[0-5]]]
|
||||
; CHECK-DAG: clfi %r2, 41
|
||||
; CHECK: selfhrh [[REG3:%r[0-5]]], [[REG1]], [[REG2]]
|
||||
; CHECK: stepc [[REG3]]
|
||||
; CHECK: br %r14
|
||||
entry:
|
||||
%a = call i32 asm sideeffect "stepa $0", "=h"()
|
||||
%b = call i32 asm sideeffect "stepb $0", "=h"()
|
||||
%cond = icmp ult i32 %limit, 42
|
||||
br i1 %cond, label %if.then, label %return
|
||||
|
||||
if.then:
|
||||
br label %return
|
||||
|
||||
return:
|
||||
%res = phi i32 [ %b, %if.then ], [ %a, %entry ]
|
||||
call void asm sideeffect "stepc $0", "h"(i32 %res)
|
||||
call void asm sideeffect "use $0", "h"(i32 %a)
|
||||
call void asm sideeffect "use $0", "h"(i32 %b)
|
||||
ret void
|
||||
}
|
||||
|
|
@ -0,0 +1,179 @@
|
|||
# RUN: llc -mtriple=s390x-linux-gnu -mcpu=arch13 -start-before=greedy %s -o - \
|
||||
# RUN: | FileCheck %s
|
||||
#
|
||||
# Test that regalloc manages (via regalloc hints) to avoid a LOCRMux jump
|
||||
# sequence expansion, and a SELR instuction is emitted.
|
||||
|
||||
--- |
|
||||
; ModuleID = 'tc.ll'
|
||||
source_filename = "tc.ll"
|
||||
target datalayout = "E-m:e-i1:8:16-i8:8:16-i64:64-f128:64-v128:64-a:8:16-n32:64"
|
||||
|
||||
@globvar = external global i32
|
||||
|
||||
declare void @fun() #0
|
||||
|
||||
define void @fun1() #0 {
|
||||
bb5:
|
||||
br label %bb6
|
||||
|
||||
bb6: ; preds = %bb33, %bb5
|
||||
%tmp = phi i1 [ %tmp34, %bb33 ], [ undef, %bb5 ]
|
||||
br label %bb7
|
||||
|
||||
bb7: ; preds = %bb7, %bb6
|
||||
%lsr.iv1 = phi [512 x i32]* [ %0, %bb7 ], [ undef, %bb6 ]
|
||||
%tmp8 = phi i32 [ %tmp27, %bb7 ], [ -1000000, %bb6 ]
|
||||
%tmp9 = phi i64 [ %tmp28, %bb7 ], [ 0, %bb6 ]
|
||||
%lsr3 = trunc i64 %tmp9 to i32
|
||||
%lsr.iv12 = bitcast [512 x i32]* %lsr.iv1 to i32*
|
||||
%tmp11 = load i32, i32* %lsr.iv12
|
||||
%tmp12 = icmp sgt i32 %tmp11, undef
|
||||
%tmp13 = trunc i64 %tmp9 to i32
|
||||
%tmp14 = select i1 %tmp12, i32 %lsr3, i32 0
|
||||
%tmp15 = select i1 %tmp12, i32 %tmp13, i32 %tmp8
|
||||
%tmp16 = load i32, i32* undef
|
||||
%tmp17 = select i1 false, i32 undef, i32 %tmp14
|
||||
%tmp18 = select i1 false, i32 undef, i32 %tmp15
|
||||
%tmp19 = select i1 false, i32 %tmp16, i32 undef
|
||||
%tmp20 = select i1 undef, i32 undef, i32 %tmp17
|
||||
%tmp21 = select i1 undef, i32 undef, i32 %tmp18
|
||||
%tmp22 = select i1 undef, i32 undef, i32 %tmp19
|
||||
%tmp23 = or i64 %tmp9, 3
|
||||
%tmp24 = icmp sgt i32 undef, %tmp22
|
||||
%tmp25 = trunc i64 %tmp23 to i32
|
||||
%tmp26 = select i1 %tmp24, i32 %tmp25, i32 %tmp20
|
||||
%tmp27 = select i1 %tmp24, i32 %tmp25, i32 %tmp21
|
||||
%tmp28 = add nuw nsw i64 %tmp9, 4
|
||||
%tmp29 = icmp eq i64 undef, 0
|
||||
%scevgep = getelementptr [512 x i32], [512 x i32]* %lsr.iv1, i64 0, i64 4
|
||||
%0 = bitcast i32* %scevgep to [512 x i32]*
|
||||
br i1 %tmp29, label %bb30, label %bb7
|
||||
|
||||
bb30: ; preds = %bb7
|
||||
%tmp32 = icmp sgt i32 %tmp27, -1000000
|
||||
br i1 %tmp32, label %bb33, label %bb35
|
||||
|
||||
bb33: ; preds = %bb30
|
||||
call void @fun()
|
||||
store i32 %tmp26, i32* @globvar
|
||||
%tmp34 = icmp ugt i32 undef, 1
|
||||
br label %bb6
|
||||
|
||||
bb35: ; preds = %bb30
|
||||
br i1 %tmp, label %bb37, label %bb38
|
||||
|
||||
bb37: ; preds = %bb35
|
||||
unreachable
|
||||
|
||||
bb38: ; preds = %bb35
|
||||
unreachable
|
||||
}
|
||||
|
||||
; Function Attrs: nounwind
|
||||
declare void @llvm.stackprotector(i8*, i8**) #1
|
||||
|
||||
attributes #0 = { "target-cpu"="arch13" }
|
||||
attributes #1 = { nounwind }
|
||||
|
||||
...
|
||||
|
||||
# CHECK: selr
|
||||
# CHECK-NOT: risblg
|
||||
|
||||
---
|
||||
name: fun1
|
||||
alignment: 4
|
||||
tracksRegLiveness: true
|
||||
registers:
|
||||
- { id: 0, class: grx32bit }
|
||||
- { id: 1, class: addr64bit }
|
||||
- { id: 2, class: grx32bit }
|
||||
- { id: 3, class: addr64bit }
|
||||
- { id: 4, class: gr32bit }
|
||||
- { id: 5, class: grx32bit }
|
||||
- { id: 6, class: gr64bit }
|
||||
- { id: 7, class: gr64bit }
|
||||
- { id: 8, class: grx32bit }
|
||||
- { id: 9, class: grx32bit }
|
||||
- { id: 10, class: gr64bit }
|
||||
- { id: 11, class: grx32bit }
|
||||
- { id: 12, class: gr64bit }
|
||||
- { id: 13, class: grx32bit }
|
||||
- { id: 14, class: gr32bit }
|
||||
- { id: 15, class: gr32bit }
|
||||
- { id: 16, class: grx32bit }
|
||||
- { id: 17, class: grx32bit }
|
||||
- { id: 18, class: gr32bit }
|
||||
- { id: 19, class: addr64bit }
|
||||
- { id: 20, class: grx32bit }
|
||||
- { id: 21, class: gr32bit }
|
||||
- { id: 22, class: gr64bit }
|
||||
- { id: 23, class: grx32bit }
|
||||
- { id: 24, class: grx32bit }
|
||||
- { id: 25, class: grx32bit }
|
||||
- { id: 26, class: addr64bit }
|
||||
- { id: 27, class: grx32bit }
|
||||
- { id: 28, class: addr64bit }
|
||||
frameInfo:
|
||||
hasCalls: true
|
||||
body: |
|
||||
bb.0.bb5:
|
||||
%25:grx32bit = IMPLICIT_DEF
|
||||
|
||||
bb.1.bb6:
|
||||
%28:addr64bit = LGHI 0
|
||||
%27:grx32bit = IIFMux 4293967296
|
||||
%26:addr64bit = IMPLICIT_DEF
|
||||
|
||||
bb.2.bb7:
|
||||
successors: %bb.3(0x04000000), %bb.2(0x7c000000)
|
||||
|
||||
%14:gr32bit = LMux %26, 0, $noreg :: (load 4 from %ir.lsr.iv12)
|
||||
CR %14, undef %15:gr32bit, implicit-def $cc
|
||||
%16:grx32bit = COPY %28.subreg_l32
|
||||
%16:grx32bit = LOCHIMux %16, 0, 14, 12, implicit $cc
|
||||
%17:grx32bit = SELRMux %27, %28.subreg_l32, 14, 2, implicit killed $cc
|
||||
%18:gr32bit = LMux undef %19:addr64bit, 0, $noreg :: (load 4 from `i32* undef`)
|
||||
%20:grx32bit = COPY %28.subreg_l32
|
||||
%20:grx32bit = OILMux %20, 3, implicit-def dead $cc
|
||||
CR undef %21:gr32bit, %18, implicit-def $cc
|
||||
%4:gr32bit = SELRMux %16, %20, 14, 2, implicit $cc
|
||||
%27:grx32bit = SELRMux %17, %20, 14, 2, implicit killed $cc
|
||||
%28:addr64bit = nuw nsw LA %28, 4, $noreg
|
||||
%26:addr64bit = LA %26, 16, $noreg
|
||||
CGHI undef %22:gr64bit, 0, implicit-def $cc
|
||||
BRC 14, 6, %bb.2, implicit killed $cc
|
||||
J %bb.3
|
||||
|
||||
bb.3.bb30:
|
||||
successors: %bb.4(0x7fffffff), %bb.5(0x00000001)
|
||||
|
||||
CFIMux %27, -999999, implicit-def $cc
|
||||
BRC 14, 4, %bb.5, implicit killed $cc
|
||||
J %bb.4
|
||||
|
||||
bb.4.bb33:
|
||||
ADJCALLSTACKDOWN 0, 0
|
||||
CallBRASL @fun, csr_systemz, implicit-def dead $r14d, implicit-def dead $cc
|
||||
ADJCALLSTACKUP 0, 0
|
||||
STRL %4, @globvar :: (store 4 into @globvar)
|
||||
CLFIMux undef %23:grx32bit, 1, implicit-def $cc
|
||||
%25:grx32bit = LHIMux 0
|
||||
%25:grx32bit = LOCHIMux %25, 1, 14, 2, implicit killed $cc
|
||||
J %bb.1
|
||||
|
||||
bb.5.bb35:
|
||||
successors: %bb.6, %bb.7
|
||||
|
||||
TMLMux %25, 1, implicit-def $cc
|
||||
BRC 15, 8, %bb.7, implicit killed $cc
|
||||
J %bb.6
|
||||
|
||||
bb.6.bb37:
|
||||
successors:
|
||||
|
||||
|
||||
bb.7.bb38:
|
||||
|
||||
...
|
|
@ -0,0 +1,74 @@
|
|||
; Test population-count instruction on arch13
|
||||
;
|
||||
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch13 | FileCheck %s
|
||||
|
||||
declare i32 @llvm.ctpop.i32(i32 %a)
|
||||
declare i64 @llvm.ctpop.i64(i64 %a)
|
||||
|
||||
define i32 @f1(i32 %a) {
|
||||
; CHECK-LABEL: f1:
|
||||
; CHECK: llgfr %r0, %r2
|
||||
; CHECK: popcnt %r2, %r0, 8
|
||||
; CHECK: br %r14
|
||||
|
||||
%popcnt = call i32 @llvm.ctpop.i32(i32 %a)
|
||||
ret i32 %popcnt
|
||||
}
|
||||
|
||||
define i32 @f2(i32 %a) {
|
||||
; CHECK-LABEL: f2:
|
||||
; CHECK: llghr %r0, %r2
|
||||
; CHECK: popcnt %r2, %r0, 8
|
||||
; CHECK: br %r14
|
||||
%and = and i32 %a, 65535
|
||||
%popcnt = call i32 @llvm.ctpop.i32(i32 %and)
|
||||
ret i32 %popcnt
|
||||
}
|
||||
|
||||
define i32 @f3(i32 %a) {
|
||||
; CHECK-LABEL: f3:
|
||||
; CHECK: llgcr %r0, %r2
|
||||
; CHECK: popcnt %r2, %r0, 8
|
||||
; CHECK: br %r14
|
||||
%and = and i32 %a, 255
|
||||
%popcnt = call i32 @llvm.ctpop.i32(i32 %and)
|
||||
ret i32 %popcnt
|
||||
}
|
||||
|
||||
define i64 @f4(i64 %a) {
|
||||
; CHECK-LABEL: f4:
|
||||
; CHECK: popcnt %r2, %r2, 8
|
||||
; CHECK: br %r14
|
||||
%popcnt = call i64 @llvm.ctpop.i64(i64 %a)
|
||||
ret i64 %popcnt
|
||||
}
|
||||
|
||||
define i64 @f5(i64 %a) {
|
||||
; CHECK-LABEL: f5:
|
||||
; CHECK: llgfr %r0, %r2
|
||||
; CHECK: popcnt %r2, %r0, 8
|
||||
%and = and i64 %a, 4294967295
|
||||
%popcnt = call i64 @llvm.ctpop.i64(i64 %and)
|
||||
ret i64 %popcnt
|
||||
}
|
||||
|
||||
define i64 @f6(i64 %a) {
|
||||
; CHECK-LABEL: f6:
|
||||
; CHECK: llghr %r0, %r2
|
||||
; CHECK: popcnt %r2, %r0, 8
|
||||
; CHECK: br %r14
|
||||
%and = and i64 %a, 65535
|
||||
%popcnt = call i64 @llvm.ctpop.i64(i64 %and)
|
||||
ret i64 %popcnt
|
||||
}
|
||||
|
||||
define i64 @f7(i64 %a) {
|
||||
; CHECK-LABEL: f7:
|
||||
; CHECK: llgcr %r0, %r2
|
||||
; CHECK: popcnt %r2, %r0, 8
|
||||
; CHECK: br %r14
|
||||
%and = and i64 %a, 255
|
||||
%popcnt = call i64 @llvm.ctpop.i64(i64 %and)
|
||||
ret i64 %popcnt
|
||||
}
|
||||
|
|
@ -0,0 +1,126 @@
|
|||
; Combined logical operations involving complement on arch13
|
||||
;
|
||||
; RUN: llc -mcpu=arch13 < %s -mtriple=s390x-linux-gnu | FileCheck %s
|
||||
|
||||
; And-with-complement 32-bit.
|
||||
define i32 @f1(i32 %dummy, i32 %a, i32 %b) {
|
||||
; CHECK-LABEL: f1:
|
||||
; CHECK: ncrk %r2, %r3, %r4
|
||||
; CHECK: br %r14
|
||||
%neg = xor i32 %b, -1
|
||||
%ret = and i32 %neg, %a
|
||||
ret i32 %ret
|
||||
}
|
||||
|
||||
; And-with-complement 64-bit.
|
||||
define i64 @f2(i64 %dummy, i64 %a, i64 %b) {
|
||||
; CHECK-LABEL: f2:
|
||||
; CHECK: ncgrk %r2, %r3, %r4
|
||||
; CHECK: br %r14
|
||||
%neg = xor i64 %b, -1
|
||||
%ret = and i64 %neg, %a
|
||||
ret i64 %ret
|
||||
}
|
||||
|
||||
; Or-with-complement 32-bit.
|
||||
define i32 @f3(i32 %dummy, i32 %a, i32 %b) {
|
||||
; CHECK-LABEL: f3:
|
||||
; CHECK: ocrk %r2, %r3, %r4
|
||||
; CHECK: br %r14
|
||||
%neg = xor i32 %b, -1
|
||||
%ret = or i32 %neg, %a
|
||||
ret i32 %ret
|
||||
}
|
||||
|
||||
; Or-with-complement 64-bit.
|
||||
define i64 @f4(i64 %dummy, i64 %a, i64 %b) {
|
||||
; CHECK-LABEL: f4:
|
||||
; CHECK: ocgrk %r2, %r3, %r4
|
||||
; CHECK: br %r14
|
||||
%neg = xor i64 %b, -1
|
||||
%ret = or i64 %neg, %a
|
||||
ret i64 %ret
|
||||
}
|
||||
|
||||
; NAND 32-bit.
|
||||
define i32 @f5(i32 %dummy, i32 %a, i32 %b) {
|
||||
; CHECK-LABEL: f5:
|
||||
; CHECK: nnrk %r2, %r3, %r4
|
||||
; CHECK: br %r14
|
||||
%tmp = and i32 %a, %b
|
||||
%ret = xor i32 %tmp, -1
|
||||
ret i32 %ret
|
||||
}
|
||||
|
||||
; NAND 64-bit.
|
||||
define i64 @f6(i64 %dummy, i64 %a, i64 %b) {
|
||||
; CHECK-LABEL: f6:
|
||||
; CHECK: nngrk %r2, %r3, %r4
|
||||
; CHECK: br %r14
|
||||
%tmp = and i64 %a, %b
|
||||
%ret = xor i64 %tmp, -1
|
||||
ret i64 %ret
|
||||
}
|
||||
|
||||
; NOR 32-bit.
|
||||
define i32 @f7(i32 %dummy, i32 %a, i32 %b) {
|
||||
; CHECK-LABEL: f7:
|
||||
; CHECK: nork %r2, %r3, %r4
|
||||
; CHECK: br %r14
|
||||
%tmp = or i32 %a, %b
|
||||
%ret = xor i32 %tmp, -1
|
||||
ret i32 %ret
|
||||
}
|
||||
|
||||
; NOR 64-bit.
|
||||
define i64 @f8(i64 %dummy, i64 %a, i64 %b) {
|
||||
; CHECK-LABEL: f8:
|
||||
; CHECK: nogrk %r2, %r3, %r4
|
||||
; CHECK: br %r14
|
||||
%tmp = or i64 %a, %b
|
||||
%ret = xor i64 %tmp, -1
|
||||
ret i64 %ret
|
||||
}
|
||||
|
||||
; NXOR 32-bit.
|
||||
define i32 @f9(i32 %dummy, i32 %a, i32 %b) {
|
||||
; CHECK-LABEL: f9:
|
||||
; CHECK: nxrk %r2, %r3, %r4
|
||||
; CHECK: br %r14
|
||||
%tmp = xor i32 %a, %b
|
||||
%ret = xor i32 %tmp, -1
|
||||
ret i32 %ret
|
||||
}
|
||||
|
||||
; NXOR 64-bit.
|
||||
define i64 @f10(i64 %dummy, i64 %a, i64 %b) {
|
||||
; CHECK-LABEL: f10:
|
||||
; CHECK: nxgrk %r2, %r3, %r4
|
||||
; CHECK: br %r14
|
||||
%tmp = xor i64 %a, %b
|
||||
%ret = xor i64 %tmp, -1
|
||||
ret i64 %ret
|
||||
}
|
||||
|
||||
; Or-with-complement 32-bit of a constant.
|
||||
define i32 @f11(i32 %a) {
|
||||
; CHECK-LABEL: f11:
|
||||
; CHECK: lhi [[REG:%r[0-5]]], -256
|
||||
; CHECK: ocrk %r2, [[REG]], %r2
|
||||
; CHECK: br %r14
|
||||
%neg = xor i32 %a, -1
|
||||
%ret = or i32 %neg, -256
|
||||
ret i32 %ret
|
||||
}
|
||||
|
||||
; Or-with-complement 64-bit of a constant.
|
||||
define i64 @f12(i64 %a) {
|
||||
; CHECK-LABEL: f12:
|
||||
; CHECK: lghi [[REG:%r[0-5]]], -256
|
||||
; CHECK: ocgrk %r2, [[REG]], %r2
|
||||
; CHECK: br %r14
|
||||
%neg = xor i64 %a, -1
|
||||
%ret = or i64 %neg, -256
|
||||
ret i64 %ret
|
||||
}
|
||||
|
|
@ -0,0 +1,97 @@
|
|||
; Test loads of byte-swapped vector elements.
|
||||
;
|
||||
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch13 | FileCheck %s
|
||||
|
||||
declare <8 x i16> @llvm.bswap.v8i16(<8 x i16>)
|
||||
declare <4 x i32> @llvm.bswap.v4i32(<4 x i32>)
|
||||
declare <2 x i64> @llvm.bswap.v2i64(<2 x i64>)
|
||||
|
||||
; Test v8i16 loads.
|
||||
define <8 x i16> @f1(<8 x i16> *%ptr) {
|
||||
; CHECK-LABEL: f1:
|
||||
; CHECK: vlbrh %v24, 0(%r2)
|
||||
; CHECK: br %r14
|
||||
%load = load <8 x i16>, <8 x i16> *%ptr
|
||||
%ret = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %load)
|
||||
ret <8 x i16> %ret
|
||||
}
|
||||
|
||||
; Test v4i32 loads.
|
||||
define <4 x i32> @f2(<4 x i32> *%ptr) {
|
||||
; CHECK-LABEL: f2:
|
||||
; CHECK: vlbrf %v24, 0(%r2)
|
||||
; CHECK: br %r14
|
||||
%load = load <4 x i32>, <4 x i32> *%ptr
|
||||
%ret = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %load)
|
||||
ret <4 x i32> %ret
|
||||
}
|
||||
|
||||
; Test v2i64 loads.
|
||||
define <2 x i64> @f3(<2 x i64> *%ptr) {
|
||||
; CHECK-LABEL: f3:
|
||||
; CHECK: vlbrg %v24, 0(%r2)
|
||||
; CHECK: br %r14
|
||||
%load = load <2 x i64>, <2 x i64> *%ptr
|
||||
%ret = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %load)
|
||||
ret <2 x i64> %ret
|
||||
}
|
||||
|
||||
; Test the highest aligned in-range offset.
|
||||
define <4 x i32> @f4(<4 x i32> *%base) {
|
||||
; CHECK-LABEL: f4:
|
||||
; CHECK: vlbrf %v24, 4080(%r2)
|
||||
; CHECK: br %r14
|
||||
%ptr = getelementptr <4 x i32>, <4 x i32> *%base, i64 255
|
||||
%load = load <4 x i32>, <4 x i32> *%ptr
|
||||
%ret = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %load)
|
||||
ret <4 x i32> %ret
|
||||
}
|
||||
|
||||
; Test the highest unaligned in-range offset.
|
||||
define <4 x i32> @f5(i8 *%base) {
|
||||
; CHECK-LABEL: f5:
|
||||
; CHECK: vlbrf %v24, 4095(%r2)
|
||||
; CHECK: br %r14
|
||||
%addr = getelementptr i8, i8 *%base, i64 4095
|
||||
%ptr = bitcast i8 *%addr to <4 x i32> *
|
||||
%load = load <4 x i32>, <4 x i32> *%ptr
|
||||
%ret = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %load)
|
||||
ret <4 x i32> %ret
|
||||
}
|
||||
|
||||
; Test the next offset up, which requires separate address logic,
|
||||
define <4 x i32> @f6(<4 x i32> *%base) {
|
||||
; CHECK-LABEL: f6:
|
||||
; CHECK: aghi %r2, 4096
|
||||
; CHECK: vlbrf %v24, 0(%r2)
|
||||
; CHECK: br %r14
|
||||
%ptr = getelementptr <4 x i32>, <4 x i32> *%base, i64 256
|
||||
%load = load <4 x i32>, <4 x i32> *%ptr
|
||||
%ret = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %load)
|
||||
ret <4 x i32> %ret
|
||||
}
|
||||
|
||||
; Test negative offsets, which also require separate address logic,
|
||||
define <4 x i32> @f7(<4 x i32> *%base) {
|
||||
; CHECK-LABEL: f7:
|
||||
; CHECK: aghi %r2, -16
|
||||
; CHECK: vlbrf %v24, 0(%r2)
|
||||
; CHECK: br %r14
|
||||
%ptr = getelementptr <4 x i32>, <4 x i32> *%base, i64 -1
|
||||
%load = load <4 x i32>, <4 x i32> *%ptr
|
||||
%ret = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %load)
|
||||
ret <4 x i32> %ret
|
||||
}
|
||||
|
||||
; Check that indexes are allowed.
|
||||
define <4 x i32> @f8(i8 *%base, i64 %index) {
|
||||
; CHECK-LABEL: f8:
|
||||
; CHECK: vlbrf %v24, 0(%r3,%r2)
|
||||
; CHECK: br %r14
|
||||
%addr = getelementptr i8, i8 *%base, i64 %index
|
||||
%ptr = bitcast i8 *%addr to <4 x i32> *
|
||||
%load = load <4 x i32>, <4 x i32> *%ptr
|
||||
%ret = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %load)
|
||||
ret <4 x i32> %ret
|
||||
}
|
||||
|
|
@ -0,0 +1,97 @@
|
|||
; Test stores of byte-swapped vector elements.
|
||||
;
|
||||
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch13 | FileCheck %s
|
||||
|
||||
declare <8 x i16> @llvm.bswap.v8i16(<8 x i16>)
|
||||
declare <4 x i32> @llvm.bswap.v4i32(<4 x i32>)
|
||||
declare <2 x i64> @llvm.bswap.v2i64(<2 x i64>)
|
||||
|
||||
; Test v8i16 stores.
|
||||
define void @f1(<8 x i16> %val, <8 x i16> *%ptr) {
|
||||
; CHECK-LABEL: f1:
|
||||
; CHECK: vstbrh %v24, 0(%r2)
|
||||
; CHECK: br %r14
|
||||
%swap = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %val)
|
||||
store <8 x i16> %swap, <8 x i16> *%ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
; Test v4i32 stores.
|
||||
define void @f2(<4 x i32> %val, <4 x i32> *%ptr) {
|
||||
; CHECK-LABEL: f2:
|
||||
; CHECK: vstbrf %v24, 0(%r2)
|
||||
; CHECK: br %r14
|
||||
%swap = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %val)
|
||||
store <4 x i32> %swap, <4 x i32> *%ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
; Test v2i64 stores.
|
||||
define void @f3(<2 x i64> %val, <2 x i64> *%ptr) {
|
||||
; CHECK-LABEL: f3:
|
||||
; CHECK: vstbrg %v24, 0(%r2)
|
||||
; CHECK: br %r14
|
||||
%swap = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %val)
|
||||
store <2 x i64> %swap, <2 x i64> *%ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
; Test the highest aligned in-range offset.
|
||||
define void @f4(<4 x i32> %val, <4 x i32> *%base) {
|
||||
; CHECK-LABEL: f4:
|
||||
; CHECK: vstbrf %v24, 4080(%r2)
|
||||
; CHECK: br %r14
|
||||
%ptr = getelementptr <4 x i32>, <4 x i32> *%base, i64 255
|
||||
%swap = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %val)
|
||||
store <4 x i32> %swap, <4 x i32> *%ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
; Test the highest unaligned in-range offset.
|
||||
define void @f5(<4 x i32> %val, i8 *%base) {
|
||||
; CHECK-LABEL: f5:
|
||||
; CHECK: vstbrf %v24, 4095(%r2)
|
||||
; CHECK: br %r14
|
||||
%addr = getelementptr i8, i8 *%base, i64 4095
|
||||
%ptr = bitcast i8 *%addr to <4 x i32> *
|
||||
%swap = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %val)
|
||||
store <4 x i32> %swap, <4 x i32> *%ptr, align 1
|
||||
ret void
|
||||
}
|
||||
|
||||
; Test the next offset up, which requires separate address logic,
|
||||
define void @f6(<4 x i32> %val, <4 x i32> *%base) {
|
||||
; CHECK-LABEL: f6:
|
||||
; CHECK: aghi %r2, 4096
|
||||
; CHECK: vstbrf %v24, 0(%r2)
|
||||
; CHECK: br %r14
|
||||
%ptr = getelementptr <4 x i32>, <4 x i32> *%base, i64 256
|
||||
%swap = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %val)
|
||||
store <4 x i32> %swap, <4 x i32> *%ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
; Test negative offsets, which also require separate address logic,
|
||||
define void @f7(<4 x i32> %val, <4 x i32> *%base) {
|
||||
; CHECK-LABEL: f7:
|
||||
; CHECK: aghi %r2, -16
|
||||
; CHECK: vstbrf %v24, 0(%r2)
|
||||
; CHECK: br %r14
|
||||
%ptr = getelementptr <4 x i32>, <4 x i32> *%base, i64 -1
|
||||
%swap = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %val)
|
||||
store <4 x i32> %swap, <4 x i32> *%ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
; Check that indexes are allowed.
|
||||
define void @f8(<4 x i32> %val, i8 *%base, i64 %index) {
|
||||
; CHECK-LABEL: f8:
|
||||
; CHECK: vstbrf %v24, 0(%r3,%r2)
|
||||
; CHECK: br %r14
|
||||
%addr = getelementptr i8, i8 *%base, i64 %index
|
||||
%ptr = bitcast i8 *%addr to <4 x i32> *
|
||||
%swap = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %val)
|
||||
store <4 x i32> %swap, <4 x i32> *%ptr, align 1
|
||||
ret void
|
||||
}
|
||||
|
|
@ -0,0 +1,220 @@
|
|||
; Test vector insertion of byte-swapped memory values.
|
||||
;
|
||||
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch13 | FileCheck %s
|
||||
|
||||
declare i16 @llvm.bswap.i16(i16)
|
||||
declare i32 @llvm.bswap.i32(i32)
|
||||
declare i64 @llvm.bswap.i64(i64)
|
||||
declare <8 x i16> @llvm.bswap.v8i16(<8 x i16>)
|
||||
declare <4 x i32> @llvm.bswap.v4i32(<4 x i32>)
|
||||
declare <2 x i64> @llvm.bswap.v2i64(<2 x i64>)
|
||||
|
||||
; Test v8i16 insertion into the first element.
|
||||
define <8 x i16> @f1(<8 x i16> %val, i16 *%ptr) {
|
||||
; CHECK-LABEL: f1:
|
||||
; CHECK: vlebrh %v24, 0(%r2), 0
|
||||
; CHECK: br %r14
|
||||
%element = load i16, i16 *%ptr
|
||||
%swap = call i16 @llvm.bswap.i16(i16 %element)
|
||||
%ret = insertelement <8 x i16> %val, i16 %swap, i32 0
|
||||
ret <8 x i16> %ret
|
||||
}
|
||||
|
||||
; Test v8i16 insertion into the last element.
|
||||
define <8 x i16> @f2(<8 x i16> %val, i16 *%ptr) {
|
||||
; CHECK-LABEL: f2:
|
||||
; CHECK: vlebrh %v24, 0(%r2), 7
|
||||
; CHECK: br %r14
|
||||
%element = load i16, i16 *%ptr
|
||||
%swap = call i16 @llvm.bswap.i16(i16 %element)
|
||||
%ret = insertelement <8 x i16> %val, i16 %swap, i32 7
|
||||
ret <8 x i16> %ret
|
||||
}
|
||||
|
||||
; Test v8i16 insertion with the highest in-range offset.
|
||||
define <8 x i16> @f3(<8 x i16> %val, i16 *%base) {
|
||||
; CHECK-LABEL: f3:
|
||||
; CHECK: vlebrh %v24, 4094(%r2), 5
|
||||
; CHECK: br %r14
|
||||
%ptr = getelementptr i16, i16 *%base, i32 2047
|
||||
%element = load i16, i16 *%ptr
|
||||
%swap = call i16 @llvm.bswap.i16(i16 %element)
|
||||
%ret = insertelement <8 x i16> %val, i16 %swap, i32 5
|
||||
ret <8 x i16> %ret
|
||||
}
|
||||
|
||||
; Test v8i16 insertion with the first ouf-of-range offset.
|
||||
define <8 x i16> @f4(<8 x i16> %val, i16 *%base) {
|
||||
; CHECK-LABEL: f4:
|
||||
; CHECK: aghi %r2, 4096
|
||||
; CHECK: vlebrh %v24, 0(%r2), 1
|
||||
; CHECK: br %r14
|
||||
%ptr = getelementptr i16, i16 *%base, i32 2048
|
||||
%element = load i16, i16 *%ptr
|
||||
%swap = call i16 @llvm.bswap.i16(i16 %element)
|
||||
%ret = insertelement <8 x i16> %val, i16 %swap, i32 1
|
||||
ret <8 x i16> %ret
|
||||
}
|
||||
|
||||
; Test v8i16 insertion into a variable element.
|
||||
define <8 x i16> @f5(<8 x i16> %val, i16 *%ptr, i32 %index) {
|
||||
; CHECK-LABEL: f5:
|
||||
; CHECK-NOT: vlebrh
|
||||
; CHECK: br %r14
|
||||
%element = load i16, i16 *%ptr
|
||||
%swap = call i16 @llvm.bswap.i16(i16 %element)
|
||||
%ret = insertelement <8 x i16> %val, i16 %swap, i32 %index
|
||||
ret <8 x i16> %ret
|
||||
}
|
||||
|
||||
; Test v8i16 insertion using a pair of vector bswaps.
|
||||
define <8 x i16> @f6(<8 x i16> %val, i16 *%ptr) {
|
||||
; CHECK-LABEL: f6:
|
||||
; CHECK: vlebrh %v24, 0(%r2), 0
|
||||
; CHECK: br %r14
|
||||
%element = load i16, i16 *%ptr
|
||||
%swapval = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %val)
|
||||
%insert = insertelement <8 x i16> %swapval, i16 %element, i32 0
|
||||
%ret = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %insert)
|
||||
ret <8 x i16> %ret
|
||||
}
|
||||
|
||||
; Test v4i32 insertion into the first element.
|
||||
define <4 x i32> @f7(<4 x i32> %val, i32 *%ptr) {
|
||||
; CHECK-LABEL: f7:
|
||||
; CHECK: vlebrf %v24, 0(%r2), 0
|
||||
; CHECK: br %r14
|
||||
%element = load i32, i32 *%ptr
|
||||
%swap = call i32 @llvm.bswap.i32(i32 %element)
|
||||
%ret = insertelement <4 x i32> %val, i32 %swap, i32 0
|
||||
ret <4 x i32> %ret
|
||||
}
|
||||
|
||||
; Test v4i32 insertion into the last element.
|
||||
define <4 x i32> @f8(<4 x i32> %val, i32 *%ptr) {
|
||||
; CHECK-LABEL: f8:
|
||||
; CHECK: vlebrf %v24, 0(%r2), 3
|
||||
; CHECK: br %r14
|
||||
%element = load i32, i32 *%ptr
|
||||
%swap = call i32 @llvm.bswap.i32(i32 %element)
|
||||
%ret = insertelement <4 x i32> %val, i32 %swap, i32 3
|
||||
ret <4 x i32> %ret
|
||||
}
|
||||
|
||||
; Test v4i32 insertion with the highest in-range offset.
|
||||
define <4 x i32> @f9(<4 x i32> %val, i32 *%base) {
|
||||
; CHECK-LABEL: f9:
|
||||
; CHECK: vlebrf %v24, 4092(%r2), 2
|
||||
; CHECK: br %r14
|
||||
%ptr = getelementptr i32, i32 *%base, i32 1023
|
||||
%element = load i32, i32 *%ptr
|
||||
%swap = call i32 @llvm.bswap.i32(i32 %element)
|
||||
%ret = insertelement <4 x i32> %val, i32 %swap, i32 2
|
||||
ret <4 x i32> %ret
|
||||
}
|
||||
|
||||
; Test v4i32 insertion with the first ouf-of-range offset.
|
||||
define <4 x i32> @f10(<4 x i32> %val, i32 *%base) {
|
||||
; CHECK-LABEL: f10:
|
||||
; CHECK: aghi %r2, 4096
|
||||
; CHECK: vlebrf %v24, 0(%r2), 1
|
||||
; CHECK: br %r14
|
||||
%ptr = getelementptr i32, i32 *%base, i32 1024
|
||||
%element = load i32, i32 *%ptr
|
||||
%swap = call i32 @llvm.bswap.i32(i32 %element)
|
||||
%ret = insertelement <4 x i32> %val, i32 %swap, i32 1
|
||||
ret <4 x i32> %ret
|
||||
}
|
||||
|
||||
; Test v4i32 insertion into a variable element.
|
||||
define <4 x i32> @f11(<4 x i32> %val, i32 *%ptr, i32 %index) {
|
||||
; CHECK-LABEL: f11:
|
||||
; CHECK-NOT: vlebrf
|
||||
; CHECK: br %r14
|
||||
%element = load i32, i32 *%ptr
|
||||
%swap = call i32 @llvm.bswap.i32(i32 %element)
|
||||
%ret = insertelement <4 x i32> %val, i32 %swap, i32 %index
|
||||
ret <4 x i32> %ret
|
||||
}
|
||||
|
||||
; Test v4i32 insertion using a pair of vector bswaps.
|
||||
define <4 x i32> @f12(<4 x i32> %val, i32 *%ptr) {
|
||||
; CHECK-LABEL: f12:
|
||||
; CHECK: vlebrf %v24, 0(%r2), 0
|
||||
; CHECK: br %r14
|
||||
%element = load i32, i32 *%ptr
|
||||
%swapval = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %val)
|
||||
%insert = insertelement <4 x i32> %swapval, i32 %element, i32 0
|
||||
%ret = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %insert)
|
||||
ret <4 x i32> %ret
|
||||
}
|
||||
|
||||
; Test v2i64 insertion into the first element.
|
||||
define <2 x i64> @f13(<2 x i64> %val, i64 *%ptr) {
|
||||
; CHECK-LABEL: f13:
|
||||
; CHECK: vlebrg %v24, 0(%r2), 0
|
||||
; CHECK: br %r14
|
||||
%element = load i64, i64 *%ptr
|
||||
%swap = call i64 @llvm.bswap.i64(i64 %element)
|
||||
%ret = insertelement <2 x i64> %val, i64 %swap, i32 0
|
||||
ret <2 x i64> %ret
|
||||
}
|
||||
|
||||
; Test v2i64 insertion into the last element.
|
||||
define <2 x i64> @f14(<2 x i64> %val, i64 *%ptr) {
|
||||
; CHECK-LABEL: f14:
|
||||
; CHECK: vlebrg %v24, 0(%r2), 1
|
||||
; CHECK: br %r14
|
||||
%element = load i64, i64 *%ptr
|
||||
%swap = call i64 @llvm.bswap.i64(i64 %element)
|
||||
%ret = insertelement <2 x i64> %val, i64 %swap, i32 1
|
||||
ret <2 x i64> %ret
|
||||
}
|
||||
|
||||
; Test v2i64 insertion with the highest in-range offset.
|
||||
define <2 x i64> @f15(<2 x i64> %val, i64 *%base) {
|
||||
; CHECK-LABEL: f15:
|
||||
; CHECK: vlebrg %v24, 4088(%r2), 1
|
||||
; CHECK: br %r14
|
||||
%ptr = getelementptr i64, i64 *%base, i32 511
|
||||
%element = load i64, i64 *%ptr
|
||||
%swap = call i64 @llvm.bswap.i64(i64 %element)
|
||||
%ret = insertelement <2 x i64> %val, i64 %swap, i32 1
|
||||
ret <2 x i64> %ret
|
||||
}
|
||||
|
||||
; Test v2i64 insertion with the first ouf-of-range offset.
|
||||
define <2 x i64> @f16(<2 x i64> %val, i64 *%base) {
|
||||
; CHECK-LABEL: f16:
|
||||
; CHECK: aghi %r2, 4096
|
||||
; CHECK: vlebrg %v24, 0(%r2), 0
|
||||
; CHECK: br %r14
|
||||
%ptr = getelementptr i64, i64 *%base, i32 512
|
||||
%element = load i64, i64 *%ptr
|
||||
%swap = call i64 @llvm.bswap.i64(i64 %element)
|
||||
%ret = insertelement <2 x i64> %val, i64 %swap, i32 0
|
||||
ret <2 x i64> %ret
|
||||
}
|
||||
|
||||
; Test v2i64 insertion into a variable element.
|
||||
define <2 x i64> @f17(<2 x i64> %val, i64 *%ptr, i32 %index) {
|
||||
; CHECK-LABEL: f17:
|
||||
; CHECK-NOT: vlebrg
|
||||
; CHECK: br %r14
|
||||
%element = load i64, i64 *%ptr
|
||||
%swap = call i64 @llvm.bswap.i64(i64 %element)
|
||||
%ret = insertelement <2 x i64> %val, i64 %swap, i32 %index
|
||||
ret <2 x i64> %ret
|
||||
}
|
||||
|
||||
; Test v2i64 insertion using a pair of vector bswaps.
|
||||
define <2 x i64> @f18(<2 x i64> %val, i64 *%ptr) {
|
||||
; CHECK-LABEL: f18:
|
||||
; CHECK: vlebrg %v24, 0(%r2), 0
|
||||
; CHECK: br %r14
|
||||
%element = load i64, i64 *%ptr
|
||||
%swapval = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %val)
|
||||
%insert = insertelement <2 x i64> %swapval, i64 %element, i32 0
|
||||
%ret = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %insert)
|
||||
ret <2 x i64> %ret
|
||||
}
|
|
@ -0,0 +1,254 @@
|
|||
; Test vector extraction of byte-swapped value to memory.
|
||||
;
|
||||
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch13 | FileCheck %s
|
||||
|
||||
declare i16 @llvm.bswap.i16(i16)
|
||||
declare i32 @llvm.bswap.i32(i32)
|
||||
declare i64 @llvm.bswap.i64(i64)
|
||||
declare <8 x i16> @llvm.bswap.v8i16(<8 x i16>)
|
||||
declare <4 x i32> @llvm.bswap.v4i32(<4 x i32>)
|
||||
declare <2 x i64> @llvm.bswap.v2i64(<2 x i64>)
|
||||
|
||||
; Test v8i16 extraction from the first element.
|
||||
define void @f1(<8 x i16> %val, i16 *%ptr) {
|
||||
; CHECK-LABEL: f1:
|
||||
; CHECK: vstebrh %v24, 0(%r2), 0
|
||||
; CHECK: br %r14
|
||||
%element = extractelement <8 x i16> %val, i32 0
|
||||
%swap = call i16 @llvm.bswap.i16(i16 %element)
|
||||
store i16 %swap, i16 *%ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
; Test v8i16 extraction from the last element.
|
||||
define void @f2(<8 x i16> %val, i16 *%ptr) {
|
||||
; CHECK-LABEL: f2:
|
||||
; CHECK: vstebrh %v24, 0(%r2), 7
|
||||
; CHECK: br %r14
|
||||
%element = extractelement <8 x i16> %val, i32 7
|
||||
%swap = call i16 @llvm.bswap.i16(i16 %element)
|
||||
store i16 %swap, i16 *%ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
; Test v8i16 extraction of an invalid element. This must compile,
|
||||
; but we don't care what it does.
|
||||
define void @f3(<8 x i16> %val, i16 *%ptr) {
|
||||
; CHECK-LABEL: f3:
|
||||
; CHECK-NOT: vstebrh %v24, 0(%r2), 8
|
||||
; CHECK: br %r14
|
||||
%element = extractelement <8 x i16> %val, i32 8
|
||||
%swap = call i16 @llvm.bswap.i16(i16 %element)
|
||||
store i16 %swap, i16 *%ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
; Test v8i16 extraction with the highest in-range offset.
|
||||
define void @f4(<8 x i16> %val, i16 *%base) {
|
||||
; CHECK-LABEL: f4:
|
||||
; CHECK: vstebrh %v24, 4094(%r2), 5
|
||||
; CHECK: br %r14
|
||||
%ptr = getelementptr i16, i16 *%base, i32 2047
|
||||
%element = extractelement <8 x i16> %val, i32 5
|
||||
%swap = call i16 @llvm.bswap.i16(i16 %element)
|
||||
store i16 %swap, i16 *%ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
; Test v8i16 extraction with the first ouf-of-range offset.
|
||||
define void @f5(<8 x i16> %val, i16 *%base) {
|
||||
; CHECK-LABEL: f5:
|
||||
; CHECK: aghi %r2, 4096
|
||||
; CHECK: vstebrh %v24, 0(%r2), 1
|
||||
; CHECK: br %r14
|
||||
%ptr = getelementptr i16, i16 *%base, i32 2048
|
||||
%element = extractelement <8 x i16> %val, i32 1
|
||||
%swap = call i16 @llvm.bswap.i16(i16 %element)
|
||||
store i16 %swap, i16 *%ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
; Test v8i16 extraction from a variable element.
|
||||
define void @f6(<8 x i16> %val, i16 *%ptr, i32 %index) {
|
||||
; CHECK-LABEL: f6:
|
||||
; CHECK-NOT: vstebrh
|
||||
; CHECK: br %r14
|
||||
%element = extractelement <8 x i16> %val, i32 %index
|
||||
%swap = call i16 @llvm.bswap.i16(i16 %element)
|
||||
store i16 %swap, i16 *%ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
; Test v8i16 extraction using a vector bswap.
|
||||
define void @f7(<8 x i16> %val, i16 *%ptr) {
|
||||
; CHECK-LABEL: f7:
|
||||
; CHECK: vstebrh %v24, 0(%r2), 0
|
||||
; CHECK: br %r14
|
||||
%swap = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %val)
|
||||
%element = extractelement <8 x i16> %swap, i32 0
|
||||
store i16 %element, i16 *%ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
; Test v4i32 extraction from the first element.
|
||||
define void @f8(<4 x i32> %val, i32 *%ptr) {
|
||||
; CHECK-LABEL: f8:
|
||||
; CHECK: vstebrf %v24, 0(%r2), 0
|
||||
; CHECK: br %r14
|
||||
%element = extractelement <4 x i32> %val, i32 0
|
||||
%swap = call i32 @llvm.bswap.i32(i32 %element)
|
||||
store i32 %swap, i32 *%ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
; Test v4i32 extraction from the last element.
|
||||
define void @f9(<4 x i32> %val, i32 *%ptr) {
|
||||
; CHECK-LABEL: f9:
|
||||
; CHECK: vstebrf %v24, 0(%r2), 3
|
||||
; CHECK: br %r14
|
||||
%element = extractelement <4 x i32> %val, i32 3
|
||||
%swap = call i32 @llvm.bswap.i32(i32 %element)
|
||||
store i32 %swap, i32 *%ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
; Test v4i32 extraction of an invalid element. This must compile,
|
||||
; but we don't care what it does.
|
||||
define void @f10(<4 x i32> %val, i32 *%ptr) {
|
||||
; CHECK-LABEL: f10:
|
||||
; CHECK-NOT: vstebrf %v24, 0(%r2), 4
|
||||
; CHECK: br %r14
|
||||
%element = extractelement <4 x i32> %val, i32 4
|
||||
%swap = call i32 @llvm.bswap.i32(i32 %element)
|
||||
store i32 %swap, i32 *%ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
; Test v4i32 extraction with the highest in-range offset.
|
||||
define void @f11(<4 x i32> %val, i32 *%base) {
|
||||
; CHECK-LABEL: f11:
|
||||
; CHECK: vstebrf %v24, 4092(%r2), 2
|
||||
; CHECK: br %r14
|
||||
%ptr = getelementptr i32, i32 *%base, i32 1023
|
||||
%element = extractelement <4 x i32> %val, i32 2
|
||||
%swap = call i32 @llvm.bswap.i32(i32 %element)
|
||||
store i32 %swap, i32 *%ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
; Test v4i32 extraction with the first ouf-of-range offset.
|
||||
define void @f12(<4 x i32> %val, i32 *%base) {
|
||||
; CHECK-LABEL: f12:
|
||||
; CHECK: aghi %r2, 4096
|
||||
; CHECK: vstebrf %v24, 0(%r2), 1
|
||||
; CHECK: br %r14
|
||||
%ptr = getelementptr i32, i32 *%base, i32 1024
|
||||
%element = extractelement <4 x i32> %val, i32 1
|
||||
%swap = call i32 @llvm.bswap.i32(i32 %element)
|
||||
store i32 %swap, i32 *%ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
; Test v4i32 extraction from a variable element.
|
||||
define void @f13(<4 x i32> %val, i32 *%ptr, i32 %index) {
|
||||
; CHECK-LABEL: f13:
|
||||
; CHECK-NOT: vstebrf
|
||||
; CHECK: br %r14
|
||||
%element = extractelement <4 x i32> %val, i32 %index
|
||||
%swap = call i32 @llvm.bswap.i32(i32 %element)
|
||||
store i32 %swap, i32 *%ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
; Test v4i32 extraction using a vector bswap.
|
||||
define void @f14(<4 x i32> %val, i32 *%ptr) {
|
||||
; CHECK-LABEL: f14:
|
||||
; CHECK: vstebrf %v24, 0(%r2), 0
|
||||
; CHECK: br %r14
|
||||
%swap = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %val)
|
||||
%element = extractelement <4 x i32> %swap, i32 0
|
||||
store i32 %element, i32 *%ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
; Test v2i64 extraction from the first element.
|
||||
define void @f15(<2 x i64> %val, i64 *%ptr) {
|
||||
; CHECK-LABEL: f15:
|
||||
; CHECK: vstebrg %v24, 0(%r2), 0
|
||||
; CHECK: br %r14
|
||||
%element = extractelement <2 x i64> %val, i32 0
|
||||
%swap = call i64 @llvm.bswap.i64(i64 %element)
|
||||
store i64 %swap, i64 *%ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
; Test v2i64 extraction from the last element.
|
||||
define void @f16(<2 x i64> %val, i64 *%ptr) {
|
||||
; CHECK-LABEL: f16:
|
||||
; CHECK: vstebrg %v24, 0(%r2), 1
|
||||
; CHECK: br %r14
|
||||
%element = extractelement <2 x i64> %val, i32 1
|
||||
%swap = call i64 @llvm.bswap.i64(i64 %element)
|
||||
store i64 %swap, i64 *%ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
; Test v2i64 extraction of an invalid element. This must compile,
|
||||
; but we don't care what it does.
|
||||
define void @f17(<2 x i64> %val, i64 *%ptr) {
|
||||
; CHECK-LABEL: f17:
|
||||
; CHECK-NOT: vstebrg %v24, 0(%r2), 2
|
||||
; CHECK: br %r14
|
||||
%element = extractelement <2 x i64> %val, i32 2
|
||||
%swap = call i64 @llvm.bswap.i64(i64 %element)
|
||||
store i64 %swap, i64 *%ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
; Test v2i64 extraction with the highest in-range offset.
|
||||
define void @f18(<2 x i64> %val, i64 *%base) {
|
||||
; CHECK-LABEL: f18:
|
||||
; CHECK: vstebrg %v24, 4088(%r2), 1
|
||||
; CHECK: br %r14
|
||||
%ptr = getelementptr i64, i64 *%base, i32 511
|
||||
%element = extractelement <2 x i64> %val, i32 1
|
||||
%swap = call i64 @llvm.bswap.i64(i64 %element)
|
||||
store i64 %swap, i64 *%ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
; Test v2i64 extraction with the first ouf-of-range offset.
|
||||
define void @f19(<2 x i64> %val, i64 *%base) {
|
||||
; CHECK-LABEL: f19:
|
||||
; CHECK: aghi %r2, 4096
|
||||
; CHECK: vstebrg %v24, 0(%r2), 0
|
||||
; CHECK: br %r14
|
||||
%ptr = getelementptr i64, i64 *%base, i32 512
|
||||
%element = extractelement <2 x i64> %val, i32 0
|
||||
%swap = call i64 @llvm.bswap.i64(i64 %element)
|
||||
store i64 %swap, i64 *%ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
; Test v2i64 extraction from a variable element.
|
||||
define void @f20(<2 x i64> %val, i64 *%ptr, i32 %index) {
|
||||
; CHECK-LABEL: f20:
|
||||
; CHECK-NOT: vstebrg
|
||||
; CHECK: br %r14
|
||||
%element = extractelement <2 x i64> %val, i32 %index
|
||||
%swap = call i64 @llvm.bswap.i64(i64 %element)
|
||||
store i64 %swap, i64 *%ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
; Test v2i64 extraction using a vector bswap.
|
||||
define void @f21(<2 x i64> %val, i64 *%ptr) {
|
||||
; CHECK-LABEL: f21:
|
||||
; CHECK: vstebrg %v24, 0(%r2), 0
|
||||
; CHECK: br %r14
|
||||
%swap = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %val)
|
||||
%element = extractelement <2 x i64> %swap, i32 0
|
||||
store i64 %element, i64 *%ptr
|
||||
ret void
|
||||
}
|
||||
|
|
@ -0,0 +1,136 @@
|
|||
; Test vector insertions of byte-swapped memory values into 0.
|
||||
;
|
||||
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch13 | FileCheck %s
|
||||
|
||||
declare i16 @llvm.bswap.i16(i16)
|
||||
declare i32 @llvm.bswap.i32(i32)
|
||||
declare i64 @llvm.bswap.i64(i64)
|
||||
declare <8 x i16> @llvm.bswap.v8i16(<8 x i16>)
|
||||
declare <4 x i32> @llvm.bswap.v4i32(<4 x i32>)
|
||||
declare <2 x i64> @llvm.bswap.v2i64(<2 x i64>)
|
||||
|
||||
; Test VLLEBRZH.
|
||||
define <8 x i16> @f1(i16 *%ptr) {
|
||||
; CHECK-LABEL: f1:
|
||||
; CHECK: vllebrzh %v24, 0(%r2)
|
||||
; CHECK: br %r14
|
||||
%val = load i16, i16 *%ptr
|
||||
%swap = call i16 @llvm.bswap.i16(i16 %val)
|
||||
%ret = insertelement <8 x i16> zeroinitializer, i16 %swap, i32 3
|
||||
ret <8 x i16> %ret
|
||||
}
|
||||
|
||||
; Test VLLEBRZH using a vector bswap.
|
||||
define <8 x i16> @f2(i16 *%ptr) {
|
||||
; CHECK-LABEL: f2:
|
||||
; CHECK: vllebrzh %v24, 0(%r2)
|
||||
; CHECK: br %r14
|
||||
%val = load i16, i16 *%ptr
|
||||
%insert = insertelement <8 x i16> zeroinitializer, i16 %val, i32 3
|
||||
%ret = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %insert)
|
||||
ret <8 x i16> %ret
|
||||
}
|
||||
|
||||
; Test VLLEBRZF.
|
||||
define <4 x i32> @f3(i32 *%ptr) {
|
||||
; CHECK-LABEL: f3:
|
||||
; CHECK: vllebrzf %v24, 0(%r2)
|
||||
; CHECK: br %r14
|
||||
%val = load i32, i32 *%ptr
|
||||
%swap = call i32 @llvm.bswap.i32(i32 %val)
|
||||
%ret = insertelement <4 x i32> zeroinitializer, i32 %swap, i32 1
|
||||
ret <4 x i32> %ret
|
||||
}
|
||||
|
||||
; Test VLLEBRZF using a vector bswap.
|
||||
define <4 x i32> @f4(i32 *%ptr) {
|
||||
; CHECK-LABEL: f4:
|
||||
; CHECK: vllebrzf %v24, 0(%r2)
|
||||
; CHECK: br %r14
|
||||
%val = load i32, i32 *%ptr
|
||||
%insert = insertelement <4 x i32> zeroinitializer, i32 %val, i32 1
|
||||
%ret = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %insert)
|
||||
ret <4 x i32> %ret
|
||||
}
|
||||
|
||||
; Test VLLEBRZG.
|
||||
define <2 x i64> @f5(i64 *%ptr) {
|
||||
; CHECK-LABEL: f5:
|
||||
; CHECK: vllebrzg %v24, 0(%r2)
|
||||
; CHECK: br %r14
|
||||
%val = load i64, i64 *%ptr
|
||||
%swap = call i64 @llvm.bswap.i64(i64 %val)
|
||||
%ret = insertelement <2 x i64> zeroinitializer, i64 %swap, i32 0
|
||||
ret <2 x i64> %ret
|
||||
}
|
||||
|
||||
; Test VLLEBRZG using a vector bswap.
|
||||
define <2 x i64> @f6(i64 *%ptr) {
|
||||
; CHECK-LABEL: f6:
|
||||
; CHECK: vllebrzg %v24, 0(%r2)
|
||||
; CHECK: br %r14
|
||||
%val = load i64, i64 *%ptr
|
||||
%insert = insertelement <2 x i64> zeroinitializer, i64 %val, i32 0
|
||||
%ret = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %insert)
|
||||
ret <2 x i64> %ret
|
||||
}
|
||||
|
||||
; Test VLLEBRZE.
|
||||
define <4 x i32> @f7(i32 *%ptr) {
|
||||
; CHECK-LABEL: f7:
|
||||
; CHECK: vllebrze %v24, 0(%r2)
|
||||
; CHECK: br %r14
|
||||
%val = load i32, i32 *%ptr
|
||||
%swap = call i32 @llvm.bswap.i32(i32 %val)
|
||||
%ret = insertelement <4 x i32> zeroinitializer, i32 %swap, i32 0
|
||||
ret <4 x i32> %ret
|
||||
}
|
||||
|
||||
; Test VLLEBRZE using a vector bswap.
|
||||
define <4 x i32> @f8(i32 *%ptr) {
|
||||
; CHECK-LABEL: f8:
|
||||
; CHECK: vllebrze %v24, 0(%r2)
|
||||
; CHECK: br %r14
|
||||
%val = load i32, i32 *%ptr
|
||||
%insert = insertelement <4 x i32> zeroinitializer, i32 %val, i32 0
|
||||
%ret = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %insert)
|
||||
ret <4 x i32> %ret
|
||||
}
|
||||
|
||||
; Test VLLEBRZH with the highest in-range offset.
|
||||
define <8 x i16> @f9(i16 *%base) {
|
||||
; CHECK-LABEL: f9:
|
||||
; CHECK: vllebrzh %v24, 4094(%r2)
|
||||
; CHECK: br %r14
|
||||
%ptr = getelementptr i16, i16 *%base, i64 2047
|
||||
%val = load i16, i16 *%ptr
|
||||
%swap = call i16 @llvm.bswap.i16(i16 %val)
|
||||
%ret = insertelement <8 x i16> zeroinitializer, i16 %swap, i32 3
|
||||
ret <8 x i16> %ret
|
||||
}
|
||||
|
||||
; Test VLLEBRZH with the next highest offset.
|
||||
define <8 x i16> @f10(i16 *%base) {
|
||||
; CHECK-LABEL: f10:
|
||||
; CHECK-NOT: vllebrzh %v24, 4096(%r2)
|
||||
; CHECK: br %r14
|
||||
%ptr = getelementptr i16, i16 *%base, i64 2048
|
||||
%val = load i16, i16 *%ptr
|
||||
%swap = call i16 @llvm.bswap.i16(i16 %val)
|
||||
%ret = insertelement <8 x i16> zeroinitializer, i16 %swap, i32 3
|
||||
ret <8 x i16> %ret
|
||||
}
|
||||
|
||||
; Test that VLLEBRZH allows an index.
|
||||
define <8 x i16> @f11(i16 *%base, i64 %index) {
|
||||
; CHECK-LABEL: f11:
|
||||
; CHECK: sllg [[REG:%r[1-5]]], %r3, 1
|
||||
; CHECK: vllebrzh %v24, 0([[REG]],%r2)
|
||||
; CHECK: br %r14
|
||||
%ptr = getelementptr i16, i16 *%base, i64 %index
|
||||
%val = load i16, i16 *%ptr
|
||||
%swap = call i16 @llvm.bswap.i16(i16 %val)
|
||||
%ret = insertelement <8 x i16> zeroinitializer, i16 %swap, i32 3
|
||||
ret <8 x i16> %ret
|
||||
}
|
||||
|
|
@ -0,0 +1,77 @@
|
|||
; Test insertions of byte-swapped memory values into a nonzero index of an undef.
|
||||
;
|
||||
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch13 | FileCheck %s
|
||||
|
||||
declare i16 @llvm.bswap.i16(i16)
|
||||
declare i32 @llvm.bswap.i32(i32)
|
||||
declare i64 @llvm.bswap.i64(i64)
|
||||
declare <8 x i16> @llvm.bswap.v8i16(<8 x i16>)
|
||||
declare <4 x i32> @llvm.bswap.v4i32(<4 x i32>)
|
||||
declare <2 x i64> @llvm.bswap.v2i64(<2 x i64>)
|
||||
|
||||
; Test v8i16 insertion into an undef, with an arbitrary index.
|
||||
define <8 x i16> @f1(i16 *%ptr) {
|
||||
; CHECK-LABEL: f1:
|
||||
; CHECK: vlbrreph %v24, 0(%r2)
|
||||
; CHECK-NEXT: br %r14
|
||||
%val = load i16, i16 *%ptr
|
||||
%swap = call i16 @llvm.bswap.i16(i16 %val)
|
||||
%ret = insertelement <8 x i16> undef, i16 %swap, i32 5
|
||||
ret <8 x i16> %ret
|
||||
}
|
||||
|
||||
; Test v8i16 insertion into an undef, using a vector bswap.
|
||||
define <8 x i16> @f2(i16 *%ptr) {
|
||||
; CHECK-LABEL: f2:
|
||||
; CHECK: vlbrreph %v24, 0(%r2)
|
||||
; CHECK-NEXT: br %r14
|
||||
%val = load i16, i16 *%ptr
|
||||
%insert = insertelement <8 x i16> undef, i16 %val, i32 5
|
||||
%ret = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %insert)
|
||||
ret <8 x i16> %ret
|
||||
}
|
||||
|
||||
; Test v4i32 insertion into an undef, with an arbitrary index.
|
||||
define <4 x i32> @f3(i32 *%ptr) {
|
||||
; CHECK-LABEL: f3:
|
||||
; CHECK: vlbrrepf %v24, 0(%r2)
|
||||
; CHECK-NEXT: br %r14
|
||||
%val = load i32, i32 *%ptr
|
||||
%swap = call i32 @llvm.bswap.i32(i32 %val)
|
||||
%ret = insertelement <4 x i32> undef, i32 %swap, i32 2
|
||||
ret <4 x i32> %ret
|
||||
}
|
||||
|
||||
; Test v4i32 insertion into an undef, using a vector bswap.
|
||||
define <4 x i32> @f4(i32 *%ptr) {
|
||||
; CHECK-LABEL: f4:
|
||||
; CHECK: vlbrrepf %v24, 0(%r2)
|
||||
; CHECK-NEXT: br %r14
|
||||
%val = load i32, i32 *%ptr
|
||||
%insert = insertelement <4 x i32> undef, i32 %val, i32 2
|
||||
%ret = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %insert)
|
||||
ret <4 x i32> %ret
|
||||
}
|
||||
|
||||
; Test v2i64 insertion into an undef, with an arbitrary index.
|
||||
define <2 x i64> @f5(i64 *%ptr) {
|
||||
; CHECK-LABEL: f5:
|
||||
; CHECK: vlbrrepg %v24, 0(%r2)
|
||||
; CHECK-NEXT: br %r14
|
||||
%val = load i64, i64 *%ptr
|
||||
%swap = call i64 @llvm.bswap.i64(i64 %val)
|
||||
%ret = insertelement <2 x i64> undef, i64 %swap, i32 1
|
||||
ret <2 x i64> %ret
|
||||
}
|
||||
|
||||
; Test v2i64 insertion into an undef, using a vector bwap.
|
||||
define <2 x i64> @f6(i64 *%ptr) {
|
||||
; CHECK-LABEL: f6:
|
||||
; CHECK: vlbrrepg %v24, 0(%r2)
|
||||
; CHECK-NEXT: br %r14
|
||||
%val = load i64, i64 *%ptr
|
||||
%insert = insertelement <2 x i64> undef, i64 %val, i32 1
|
||||
%ret = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %insert)
|
||||
ret <2 x i64> %ret
|
||||
}
|
||||
|
|
@ -0,0 +1,192 @@
|
|||
; Test replications of a byte-swapped scalar memory value.
|
||||
;
|
||||
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch13 | FileCheck %s
|
||||
|
||||
declare i16 @llvm.bswap.i16(i16)
|
||||
declare i32 @llvm.bswap.i32(i32)
|
||||
declare i64 @llvm.bswap.i64(i64)
|
||||
declare <8 x i16> @llvm.bswap.v8i16(<8 x i16>)
|
||||
declare <4 x i32> @llvm.bswap.v4i32(<4 x i32>)
|
||||
declare <2 x i64> @llvm.bswap.v2i64(<2 x i64>)
|
||||
|
||||
; Test a v8i16 replicating load with no offset.
|
||||
define <8 x i16> @f1(i16 *%ptr) {
|
||||
; CHECK-LABEL: f1:
|
||||
; CHECK: vlbrreph %v24, 0(%r2)
|
||||
; CHECK: br %r14
|
||||
%scalar = load i16, i16 *%ptr
|
||||
%swap = call i16 @llvm.bswap.i16(i16 %scalar)
|
||||
%val = insertelement <8 x i16> undef, i16 %swap, i32 0
|
||||
%ret = shufflevector <8 x i16> %val, <8 x i16> undef,
|
||||
<8 x i32> zeroinitializer
|
||||
ret <8 x i16> %ret
|
||||
}
|
||||
|
||||
; Test a v8i16 replicating load with the maximum in-range offset.
|
||||
define <8 x i16> @f2(i16 *%base) {
|
||||
; CHECK-LABEL: f2:
|
||||
; CHECK: vlbrreph %v24, 4094(%r2)
|
||||
; CHECK: br %r14
|
||||
%ptr = getelementptr i16, i16 *%base, i64 2047
|
||||
%scalar = load i16, i16 *%ptr
|
||||
%swap = call i16 @llvm.bswap.i16(i16 %scalar)
|
||||
%val = insertelement <8 x i16> undef, i16 %swap, i32 0
|
||||
%ret = shufflevector <8 x i16> %val, <8 x i16> undef,
|
||||
<8 x i32> zeroinitializer
|
||||
ret <8 x i16> %ret
|
||||
}
|
||||
|
||||
; Test a v8i16 replicating load with the first out-of-range offset.
|
||||
define <8 x i16> @f3(i16 *%base) {
|
||||
; CHECK-LABEL: f3:
|
||||
; CHECK: aghi %r2, 4096
|
||||
; CHECK: vlbrreph %v24, 0(%r2)
|
||||
; CHECK: br %r14
|
||||
%ptr = getelementptr i16, i16 *%base, i64 2048
|
||||
%scalar = load i16, i16 *%ptr
|
||||
%swap = call i16 @llvm.bswap.i16(i16 %scalar)
|
||||
%val = insertelement <8 x i16> undef, i16 %swap, i32 0
|
||||
%ret = shufflevector <8 x i16> %val, <8 x i16> undef,
|
||||
<8 x i32> zeroinitializer
|
||||
ret <8 x i16> %ret
|
||||
}
|
||||
|
||||
; Test a v8i16 replicating load using a vector bswap.
|
||||
define <8 x i16> @f4(i16 *%ptr) {
|
||||
; CHECK-LABEL: f4:
|
||||
; CHECK: vlbrreph %v24, 0(%r2)
|
||||
; CHECK: br %r14
|
||||
%scalar = load i16, i16 *%ptr
|
||||
%val = insertelement <8 x i16> undef, i16 %scalar, i32 0
|
||||
%rep = shufflevector <8 x i16> %val, <8 x i16> undef,
|
||||
<8 x i32> zeroinitializer
|
||||
%ret = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %rep)
|
||||
ret <8 x i16> %ret
|
||||
}
|
||||
|
||||
; Test a v4i32 replicating load with no offset.
|
||||
define <4 x i32> @f5(i32 *%ptr) {
|
||||
; CHECK-LABEL: f5:
|
||||
; CHECK: vlbrrepf %v24, 0(%r2)
|
||||
; CHECK: br %r14
|
||||
%scalar = load i32, i32 *%ptr
|
||||
%swap = call i32 @llvm.bswap.i32(i32 %scalar)
|
||||
%val = insertelement <4 x i32> undef, i32 %swap, i32 0
|
||||
%ret = shufflevector <4 x i32> %val, <4 x i32> undef,
|
||||
<4 x i32> zeroinitializer
|
||||
ret <4 x i32> %ret
|
||||
}
|
||||
|
||||
; Test a v4i32 replicating load with the maximum in-range offset.
|
||||
define <4 x i32> @f6(i32 *%base) {
|
||||
; CHECK-LABEL: f6:
|
||||
; CHECK: vlbrrepf %v24, 4092(%r2)
|
||||
; CHECK: br %r14
|
||||
%ptr = getelementptr i32, i32 *%base, i64 1023
|
||||
%scalar = load i32, i32 *%ptr
|
||||
%swap = call i32 @llvm.bswap.i32(i32 %scalar)
|
||||
%val = insertelement <4 x i32> undef, i32 %swap, i32 0
|
||||
%ret = shufflevector <4 x i32> %val, <4 x i32> undef,
|
||||
<4 x i32> zeroinitializer
|
||||
ret <4 x i32> %ret
|
||||
}
|
||||
|
||||
; Test a v4i32 replicating load with the first out-of-range offset.
|
||||
define <4 x i32> @f7(i32 *%base) {
|
||||
; CHECK-LABEL: f7:
|
||||
; CHECK: aghi %r2, 4096
|
||||
; CHECK: vlbrrepf %v24, 0(%r2)
|
||||
; CHECK: br %r14
|
||||
%ptr = getelementptr i32, i32 *%base, i64 1024
|
||||
%scalar = load i32, i32 *%ptr
|
||||
%swap = call i32 @llvm.bswap.i32(i32 %scalar)
|
||||
%val = insertelement <4 x i32> undef, i32 %swap, i32 0
|
||||
%ret = shufflevector <4 x i32> %val, <4 x i32> undef,
|
||||
<4 x i32> zeroinitializer
|
||||
ret <4 x i32> %ret
|
||||
}
|
||||
|
||||
; Test a v4i32 replicating load using a vector bswap.
|
||||
define <4 x i32> @f8(i32 *%ptr) {
|
||||
; CHECK-LABEL: f8:
|
||||
; CHECK: vlbrrepf %v24, 0(%r2)
|
||||
; CHECK: br %r14
|
||||
%scalar = load i32, i32 *%ptr
|
||||
%val = insertelement <4 x i32> undef, i32 %scalar, i32 0
|
||||
%rep = shufflevector <4 x i32> %val, <4 x i32> undef,
|
||||
<4 x i32> zeroinitializer
|
||||
%ret = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %rep)
|
||||
ret <4 x i32> %ret
|
||||
}
|
||||
|
||||
; Test a v2i64 replicating load with no offset.
|
||||
define <2 x i64> @f9(i64 *%ptr) {
|
||||
; CHECK-LABEL: f9:
|
||||
; CHECK: vlbrrepg %v24, 0(%r2)
|
||||
; CHECK: br %r14
|
||||
%scalar = load i64, i64 *%ptr
|
||||
%swap = call i64 @llvm.bswap.i64(i64 %scalar)
|
||||
%val = insertelement <2 x i64> undef, i64 %swap, i32 0
|
||||
%ret = shufflevector <2 x i64> %val, <2 x i64> undef,
|
||||
<2 x i32> zeroinitializer
|
||||
ret <2 x i64> %ret
|
||||
}
|
||||
|
||||
; Test a v2i64 replicating load with the maximum in-range offset.
|
||||
define <2 x i64> @f10(i64 *%base) {
|
||||
; CHECK-LABEL: f10:
|
||||
; CHECK: vlbrrepg %v24, 4088(%r2)
|
||||
; CHECK: br %r14
|
||||
%ptr = getelementptr i64, i64 *%base, i32 511
|
||||
%scalar = load i64, i64 *%ptr
|
||||
%swap = call i64 @llvm.bswap.i64(i64 %scalar)
|
||||
%val = insertelement <2 x i64> undef, i64 %swap, i32 0
|
||||
%ret = shufflevector <2 x i64> %val, <2 x i64> undef,
|
||||
<2 x i32> zeroinitializer
|
||||
ret <2 x i64> %ret
|
||||
}
|
||||
|
||||
; Test a v2i64 replicating load with the first out-of-range offset.
|
||||
define <2 x i64> @f11(i64 *%base) {
|
||||
; CHECK-LABEL: f11:
|
||||
; CHECK: aghi %r2, 4096
|
||||
; CHECK: vlbrrepg %v24, 0(%r2)
|
||||
; CHECK: br %r14
|
||||
%ptr = getelementptr i64, i64 *%base, i32 512
|
||||
%scalar = load i64, i64 *%ptr
|
||||
%swap = call i64 @llvm.bswap.i64(i64 %scalar)
|
||||
%val = insertelement <2 x i64> undef, i64 %swap, i32 0
|
||||
%ret = shufflevector <2 x i64> %val, <2 x i64> undef,
|
||||
<2 x i32> zeroinitializer
|
||||
ret <2 x i64> %ret
|
||||
}
|
||||
|
||||
; Test a v2i64 replicating load using a vector bswap.
|
||||
define <2 x i64> @f12(i64 *%ptr) {
|
||||
; CHECK-LABEL: f12:
|
||||
; CHECK: vlbrrepg %v24, 0(%r2)
|
||||
; CHECK: br %r14
|
||||
%scalar = load i64, i64 *%ptr
|
||||
%val = insertelement <2 x i64> undef, i64 %scalar, i32 0
|
||||
%rep = shufflevector <2 x i64> %val, <2 x i64> undef,
|
||||
<2 x i32> zeroinitializer
|
||||
%ret = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %rep)
|
||||
ret <2 x i64> %ret
|
||||
}
|
||||
|
||||
; Test a v8i16 replicating load with an index.
|
||||
define <8 x i16> @f13(i16 *%base, i64 %index) {
|
||||
; CHECK-LABEL: f13:
|
||||
; CHECK: sllg [[REG:%r[1-5]]], %r3, 1
|
||||
; CHECK: vlbrreph %v24, 2046([[REG]],%r2)
|
||||
; CHECK: br %r14
|
||||
%ptr1 = getelementptr i16, i16 *%base, i64 %index
|
||||
%ptr = getelementptr i16, i16 *%ptr1, i64 1023
|
||||
%scalar = load i16, i16 *%ptr
|
||||
%swap = call i16 @llvm.bswap.i16(i16 %scalar)
|
||||
%val = insertelement <8 x i16> undef, i16 %swap, i32 0
|
||||
%ret = shufflevector <8 x i16> %val, <8 x i16> undef,
|
||||
<8 x i32> zeroinitializer
|
||||
ret <8 x i16> %ret
|
||||
}
|
||||
|
|
@ -0,0 +1,40 @@
|
|||
; Test conversions between integer and float elements on arch13.
|
||||
;
|
||||
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch13 | FileCheck %s
|
||||
|
||||
; Test conversion of f32s to signed i32s.
|
||||
define <4 x i32> @f1(<4 x float> %floats) {
|
||||
; CHECK-LABEL: f1:
|
||||
; CHECK: vcfeb %v24, %v24, 0, 5
|
||||
; CHECK: br %r14
|
||||
%dwords = fptosi <4 x float> %floats to <4 x i32>
|
||||
ret <4 x i32> %dwords
|
||||
}
|
||||
|
||||
; Test conversion of f32s to unsigned i32s.
|
||||
define <4 x i32> @f2(<4 x float> %floats) {
|
||||
; CHECK-LABEL: f2:
|
||||
; CHECK: vclfeb %v24, %v24, 0, 5
|
||||
; CHECK: br %r14
|
||||
%dwords = fptoui <4 x float> %floats to <4 x i32>
|
||||
ret <4 x i32> %dwords
|
||||
}
|
||||
|
||||
; Test conversion of signed i32s to f32s.
|
||||
define <4 x float> @f3(<4 x i32> %dwords) {
|
||||
; CHECK-LABEL: f3:
|
||||
; CHECK: vcefb %v24, %v24, 0, 0
|
||||
; CHECK: br %r14
|
||||
%floats = sitofp <4 x i32> %dwords to <4 x float>
|
||||
ret <4 x float> %floats
|
||||
}
|
||||
|
||||
; Test conversion of unsigned i32s to f32s.
|
||||
define <4 x float> @f4(<4 x i32> %dwords) {
|
||||
; CHECK-LABEL: f4:
|
||||
; CHECK: vcelfb %v24, %v24, 0, 0
|
||||
; CHECK: br %r14
|
||||
%floats = uitofp <4 x i32> %dwords to <4 x float>
|
||||
ret <4 x float> %floats
|
||||
}
|
||||
|
|
@ -0,0 +1,138 @@
|
|||
; Test loads of byte-swapped vector elements.
|
||||
;
|
||||
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch13 | FileCheck %s
|
||||
|
||||
; Test v16i8 loads.
|
||||
define <16 x i8> @f1(<16 x i8> *%ptr) {
|
||||
; CHECK-LABEL: f1:
|
||||
; CHECK: vlbrq %v24, 0(%r2)
|
||||
; CHECK: br %r14
|
||||
%load = load <16 x i8>, <16 x i8> *%ptr
|
||||
%ret = shufflevector <16 x i8> %load, <16 x i8> undef,
|
||||
<16 x i32> <i32 15, i32 14, i32 13, i32 12,
|
||||
i32 11, i32 10, i32 9, i32 8,
|
||||
i32 7, i32 6, i32 5, i32 4,
|
||||
i32 3, i32 2, i32 1, i32 0>
|
||||
ret <16 x i8> %ret
|
||||
}
|
||||
|
||||
; Test v8i16 loads.
|
||||
define <8 x i16> @f2(<8 x i16> *%ptr) {
|
||||
; CHECK-LABEL: f2:
|
||||
; CHECK: vlerh %v24, 0(%r2)
|
||||
; CHECK: br %r14
|
||||
%load = load <8 x i16>, <8 x i16> *%ptr
|
||||
%ret = shufflevector <8 x i16> %load, <8 x i16> undef,
|
||||
<8 x i32> <i32 7, i32 6, i32 5, i32 4,
|
||||
i32 3, i32 2, i32 1, i32 0>
|
||||
ret <8 x i16> %ret
|
||||
}
|
||||
|
||||
; Test v4i32 loads.
|
||||
define <4 x i32> @f3(<4 x i32> *%ptr) {
|
||||
; CHECK-LABEL: f3:
|
||||
; CHECK: vlerf %v24, 0(%r2)
|
||||
; CHECK: br %r14
|
||||
%load = load <4 x i32>, <4 x i32> *%ptr
|
||||
%ret = shufflevector <4 x i32> %load, <4 x i32> undef,
|
||||
<4 x i32> <i32 3, i32 2, i32 1, i32 0>
|
||||
ret <4 x i32> %ret
|
||||
}
|
||||
|
||||
; Test v2i64 loads.
|
||||
define <2 x i64> @f4(<2 x i64> *%ptr) {
|
||||
; CHECK-LABEL: f4:
|
||||
; CHECK: vlerg %v24, 0(%r2)
|
||||
; CHECK: br %r14
|
||||
%load = load <2 x i64>, <2 x i64> *%ptr
|
||||
%ret = shufflevector <2 x i64> %load, <2 x i64> undef,
|
||||
<2 x i32> <i32 1, i32 0>
|
||||
ret <2 x i64> %ret
|
||||
}
|
||||
|
||||
; Test v4f32 loads.
|
||||
define <4 x float> @f5(<4 x float> *%ptr) {
|
||||
; CHECK-LABEL: f5:
|
||||
; CHECK: vlerf %v24, 0(%r2)
|
||||
; CHECK: br %r14
|
||||
%load = load <4 x float>, <4 x float> *%ptr
|
||||
%ret = shufflevector <4 x float> %load, <4 x float> undef,
|
||||
<4 x i32> <i32 3, i32 2, i32 1, i32 0>
|
||||
ret <4 x float> %ret
|
||||
}
|
||||
|
||||
; Test v2f64 loads.
|
||||
define <2 x double> @f6(<2 x double> *%ptr) {
|
||||
; CHECK-LABEL: f6:
|
||||
; CHECK: vlerg %v24, 0(%r2)
|
||||
; CHECK: br %r14
|
||||
%load = load <2 x double>, <2 x double> *%ptr
|
||||
%ret = shufflevector <2 x double> %load, <2 x double> undef,
|
||||
<2 x i32> <i32 1, i32 0>
|
||||
ret <2 x double> %ret
|
||||
}
|
||||
|
||||
; Test the highest aligned in-range offset.
|
||||
define <4 x i32> @f7(<4 x i32> *%base) {
|
||||
; CHECK-LABEL: f7:
|
||||
; CHECK: vlerf %v24, 4080(%r2)
|
||||
; CHECK: br %r14
|
||||
%ptr = getelementptr <4 x i32>, <4 x i32> *%base, i64 255
|
||||
%load = load <4 x i32>, <4 x i32> *%ptr
|
||||
%ret = shufflevector <4 x i32> %load, <4 x i32> undef,
|
||||
<4 x i32> <i32 3, i32 2, i32 1, i32 0>
|
||||
ret <4 x i32> %ret
|
||||
}
|
||||
|
||||
; Test the highest unaligned in-range offset.
|
||||
define <4 x i32> @f8(i8 *%base) {
|
||||
; CHECK-LABEL: f8:
|
||||
; CHECK: vlerf %v24, 4095(%r2)
|
||||
; CHECK: br %r14
|
||||
%addr = getelementptr i8, i8 *%base, i64 4095
|
||||
%ptr = bitcast i8 *%addr to <4 x i32> *
|
||||
%load = load <4 x i32>, <4 x i32> *%ptr
|
||||
%ret = shufflevector <4 x i32> %load, <4 x i32> undef,
|
||||
<4 x i32> <i32 3, i32 2, i32 1, i32 0>
|
||||
ret <4 x i32> %ret
|
||||
}
|
||||
|
||||
; Test the next offset up, which requires separate address logic,
|
||||
define <4 x i32> @f9(<4 x i32> *%base) {
|
||||
; CHECK-LABEL: f9:
|
||||
; CHECK: aghi %r2, 4096
|
||||
; CHECK: vlerf %v24, 0(%r2)
|
||||
; CHECK: br %r14
|
||||
%ptr = getelementptr <4 x i32>, <4 x i32> *%base, i64 256
|
||||
%load = load <4 x i32>, <4 x i32> *%ptr
|
||||
%ret = shufflevector <4 x i32> %load, <4 x i32> undef,
|
||||
<4 x i32> <i32 3, i32 2, i32 1, i32 0>
|
||||
ret <4 x i32> %ret
|
||||
}
|
||||
|
||||
; Test negative offsets, which also require separate address logic,
|
||||
define <4 x i32> @f10(<4 x i32> *%base) {
|
||||
; CHECK-LABEL: f10:
|
||||
; CHECK: aghi %r2, -16
|
||||
; CHECK: vlerf %v24, 0(%r2)
|
||||
; CHECK: br %r14
|
||||
%ptr = getelementptr <4 x i32>, <4 x i32> *%base, i64 -1
|
||||
%load = load <4 x i32>, <4 x i32> *%ptr
|
||||
%ret = shufflevector <4 x i32> %load, <4 x i32> undef,
|
||||
<4 x i32> <i32 3, i32 2, i32 1, i32 0>
|
||||
ret <4 x i32> %ret
|
||||
}
|
||||
|
||||
; Check that indexes are allowed.
|
||||
define <4 x i32> @f11(i8 *%base, i64 %index) {
|
||||
; CHECK-LABEL: f11:
|
||||
; CHECK: vlerf %v24, 0(%r3,%r2)
|
||||
; CHECK: br %r14
|
||||
%addr = getelementptr i8, i8 *%base, i64 %index
|
||||
%ptr = bitcast i8 *%addr to <4 x i32> *
|
||||
%load = load <4 x i32>, <4 x i32> *%ptr
|
||||
%ret = shufflevector <4 x i32> %load, <4 x i32> undef,
|
||||
<4 x i32> <i32 3, i32 2, i32 1, i32 0>
|
||||
ret <4 x i32> %ret
|
||||
}
|
||||
|
|
@ -0,0 +1,138 @@
|
|||
; Test stores of element-swapped vector elements.
|
||||
;
|
||||
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch13 | FileCheck %s
|
||||
|
||||
; Test v16i8 stores.
|
||||
define void @f1(<16 x i8> %val, <16 x i8> *%ptr) {
|
||||
; CHECK-LABEL: f1:
|
||||
; CHECK: vstbrq %v24, 0(%r2)
|
||||
; CHECK: br %r14
|
||||
%swap = shufflevector <16 x i8> %val, <16 x i8> undef,
|
||||
<16 x i32> <i32 15, i32 14, i32 13, i32 12,
|
||||
i32 11, i32 10, i32 9, i32 8,
|
||||
i32 7, i32 6, i32 5, i32 4,
|
||||
i32 3, i32 2, i32 1, i32 0>
|
||||
store <16 x i8> %swap, <16 x i8> *%ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
; Test v8i16 stores.
|
||||
define void @f2(<8 x i16> %val, <8 x i16> *%ptr) {
|
||||
; CHECK-LABEL: f2:
|
||||
; CHECK: vsterh %v24, 0(%r2)
|
||||
; CHECK: br %r14
|
||||
%swap = shufflevector <8 x i16> %val, <8 x i16> undef,
|
||||
<8 x i32> <i32 7, i32 6, i32 5, i32 4,
|
||||
i32 3, i32 2, i32 1, i32 0>
|
||||
store <8 x i16> %swap, <8 x i16> *%ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
; Test v4i32 stores.
|
||||
define void @f3(<4 x i32> %val, <4 x i32> *%ptr) {
|
||||
; CHECK-LABEL: f3:
|
||||
; CHECK: vsterf %v24, 0(%r2)
|
||||
; CHECK: br %r14
|
||||
%swap = shufflevector <4 x i32> %val, <4 x i32> undef,
|
||||
<4 x i32> <i32 3, i32 2, i32 1, i32 0>
|
||||
store <4 x i32> %swap, <4 x i32> *%ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
; Test v2i64 stores.
|
||||
define void @f4(<2 x i64> %val, <2 x i64> *%ptr) {
|
||||
; CHECK-LABEL: f4:
|
||||
; CHECK: vsterg %v24, 0(%r2)
|
||||
; CHECK: br %r14
|
||||
%swap = shufflevector <2 x i64> %val, <2 x i64> undef,
|
||||
<2 x i32> <i32 1, i32 0>
|
||||
store <2 x i64> %swap, <2 x i64> *%ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
; Test v4f32 stores.
|
||||
define void @f5(<4 x float> %val, <4 x float> *%ptr) {
|
||||
; CHECK-LABEL: f5:
|
||||
; CHECK: vsterf %v24, 0(%r2)
|
||||
; CHECK: br %r14
|
||||
%swap = shufflevector <4 x float> %val, <4 x float> undef,
|
||||
<4 x i32> <i32 3, i32 2, i32 1, i32 0>
|
||||
store <4 x float> %swap, <4 x float> *%ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
; Test v2f64 stores.
|
||||
define void @f6(<2 x double> %val, <2 x double> *%ptr) {
|
||||
; CHECK-LABEL: f6:
|
||||
; CHECK: vsterg %v24, 0(%r2)
|
||||
; CHECK: br %r14
|
||||
%swap = shufflevector <2 x double> %val, <2 x double> undef,
|
||||
<2 x i32> <i32 1, i32 0>
|
||||
store <2 x double> %swap, <2 x double> *%ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
; Test the highest aligned in-range offset.
|
||||
define void @f7(<4 x i32> %val, <4 x i32> *%base) {
|
||||
; CHECK-LABEL: f7:
|
||||
; CHECK: vsterf %v24, 4080(%r2)
|
||||
; CHECK: br %r14
|
||||
%ptr = getelementptr <4 x i32>, <4 x i32> *%base, i64 255
|
||||
%swap = shufflevector <4 x i32> %val, <4 x i32> undef,
|
||||
<4 x i32> <i32 3, i32 2, i32 1, i32 0>
|
||||
store <4 x i32> %swap, <4 x i32> *%ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
; Test the highest unaligned in-range offset.
|
||||
define void @f8(<4 x i32> %val, i8 *%base) {
|
||||
; CHECK-LABEL: f8:
|
||||
; CHECK: vsterf %v24, 4095(%r2)
|
||||
; CHECK: br %r14
|
||||
%addr = getelementptr i8, i8 *%base, i64 4095
|
||||
%ptr = bitcast i8 *%addr to <4 x i32> *
|
||||
%swap = shufflevector <4 x i32> %val, <4 x i32> undef,
|
||||
<4 x i32> <i32 3, i32 2, i32 1, i32 0>
|
||||
store <4 x i32> %swap, <4 x i32> *%ptr, align 1
|
||||
ret void
|
||||
}
|
||||
|
||||
; Test the next offset up, which requires separate address logic,
|
||||
define void @f9(<4 x i32> %val, <4 x i32> *%base) {
|
||||
; CHECK-LABEL: f9:
|
||||
; CHECK: aghi %r2, 4096
|
||||
; CHECK: vsterf %v24, 0(%r2)
|
||||
; CHECK: br %r14
|
||||
%ptr = getelementptr <4 x i32>, <4 x i32> *%base, i64 256
|
||||
%swap = shufflevector <4 x i32> %val, <4 x i32> undef,
|
||||
<4 x i32> <i32 3, i32 2, i32 1, i32 0>
|
||||
store <4 x i32> %swap, <4 x i32> *%ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
; Test negative offsets, which also require separate address logic,
|
||||
define void @f10(<4 x i32> %val, <4 x i32> *%base) {
|
||||
; CHECK-LABEL: f10:
|
||||
; CHECK: aghi %r2, -16
|
||||
; CHECK: vsterf %v24, 0(%r2)
|
||||
; CHECK: br %r14
|
||||
%ptr = getelementptr <4 x i32>, <4 x i32> *%base, i64 -1
|
||||
%swap = shufflevector <4 x i32> %val, <4 x i32> undef,
|
||||
<4 x i32> <i32 3, i32 2, i32 1, i32 0>
|
||||
store <4 x i32> %swap, <4 x i32> *%ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
; Check that indexes are allowed.
|
||||
define void @f11(<4 x i32> %val, i8 *%base, i64 %index) {
|
||||
; CHECK-LABEL: f11:
|
||||
; CHECK: vsterf %v24, 0(%r3,%r2)
|
||||
; CHECK: br %r14
|
||||
%addr = getelementptr i8, i8 *%base, i64 %index
|
||||
%ptr = bitcast i8 *%addr to <4 x i32> *
|
||||
%swap = shufflevector <4 x i32> %val, <4 x i32> undef,
|
||||
<4 x i32> <i32 3, i32 2, i32 1, i32 0>
|
||||
store <4 x i32> %swap, <4 x i32> *%ptr, align 1
|
||||
ret void
|
||||
}
|
||||
|
|
@ -0,0 +1,154 @@
|
|||
; Test vector intrinsics added with arch13.
|
||||
;
|
||||
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch13 | FileCheck %s
|
||||
|
||||
declare <16 x i8> @llvm.s390.vsld(<16 x i8>, <16 x i8>, i32)
|
||||
declare <16 x i8> @llvm.s390.vsrd(<16 x i8>, <16 x i8>, i32)
|
||||
|
||||
declare {<16 x i8>, i32} @llvm.s390.vstrsb(<16 x i8>, <16 x i8>, <16 x i8>)
|
||||
declare {<16 x i8>, i32} @llvm.s390.vstrsh(<8 x i16>, <8 x i16>, <16 x i8>)
|
||||
declare {<16 x i8>, i32} @llvm.s390.vstrsf(<4 x i32>, <4 x i32>, <16 x i8>)
|
||||
declare {<16 x i8>, i32} @llvm.s390.vstrszb(<16 x i8>, <16 x i8>, <16 x i8>)
|
||||
declare {<16 x i8>, i32} @llvm.s390.vstrszh(<8 x i16>, <8 x i16>, <16 x i8>)
|
||||
declare {<16 x i8>, i32} @llvm.s390.vstrszf(<4 x i32>, <4 x i32>, <16 x i8>)
|
||||
|
||||
|
||||
; VSLD with the minimum useful value.
|
||||
define <16 x i8> @test_vsld_1(<16 x i8> %a, <16 x i8> %b) {
|
||||
; CHECK-LABEL: test_vsld_1:
|
||||
; CHECK: vsld %v24, %v24, %v26, 1
|
||||
; CHECK: br %r14
|
||||
%res = call <16 x i8> @llvm.s390.vsld(<16 x i8> %a, <16 x i8> %b, i32 1)
|
||||
ret <16 x i8> %res
|
||||
}
|
||||
|
||||
; VSLD with the maximum value.
|
||||
define <16 x i8> @test_vsld_7(<16 x i8> %a, <16 x i8> %b) {
|
||||
; CHECK-LABEL: test_vsld_7:
|
||||
; CHECK: vsld %v24, %v24, %v26, 7
|
||||
; CHECK: br %r14
|
||||
%res = call <16 x i8> @llvm.s390.vsld(<16 x i8> %a, <16 x i8> %b, i32 7)
|
||||
ret <16 x i8> %res
|
||||
}
|
||||
|
||||
; VSRD with the minimum useful value.
|
||||
define <16 x i8> @test_vsrd_1(<16 x i8> %a, <16 x i8> %b) {
|
||||
; CHECK-LABEL: test_vsrd_1:
|
||||
; CHECK: vsrd %v24, %v24, %v26, 1
|
||||
; CHECK: br %r14
|
||||
%res = call <16 x i8> @llvm.s390.vsrd(<16 x i8> %a, <16 x i8> %b, i32 1)
|
||||
ret <16 x i8> %res
|
||||
}
|
||||
|
||||
; VSRD with the maximum value.
|
||||
define <16 x i8> @test_vsrd_7(<16 x i8> %a, <16 x i8> %b) {
|
||||
; CHECK-LABEL: test_vsrd_7:
|
||||
; CHECK: vsrd %v24, %v24, %v26, 7
|
||||
; CHECK: br %r14
|
||||
%res = call <16 x i8> @llvm.s390.vsrd(<16 x i8> %a, <16 x i8> %b, i32 7)
|
||||
ret <16 x i8> %res
|
||||
}
|
||||
|
||||
|
||||
; VSTRSB.
|
||||
define <16 x i8> @test_vstrsb(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c,
|
||||
i32 *%ccptr) {
|
||||
; CHECK-LABEL: test_vstrsb:
|
||||
; CHECK: vstrsb %v24, %v24, %v26, %v28, 0
|
||||
; CHECK: ipm [[REG:%r[0-5]]]
|
||||
; CHECK: srl [[REG]], 28
|
||||
; CHECK: st [[REG]], 0(%r2)
|
||||
; CHECK: br %r14
|
||||
%call = call {<16 x i8>, i32} @llvm.s390.vstrsb(<16 x i8> %a, <16 x i8> %b,
|
||||
<16 x i8> %c)
|
||||
%res = extractvalue {<16 x i8>, i32} %call, 0
|
||||
%cc = extractvalue {<16 x i8>, i32} %call, 1
|
||||
store i32 %cc, i32 *%ccptr
|
||||
ret <16 x i8> %res
|
||||
}
|
||||
|
||||
; VSTRSH.
|
||||
define <16 x i8> @test_vstrsh(<8 x i16> %a, <8 x i16> %b, <16 x i8> %c,
|
||||
i32 *%ccptr) {
|
||||
; CHECK-LABEL: test_vstrsh:
|
||||
; CHECK: vstrsh %v24, %v24, %v26, %v28, 0
|
||||
; CHECK: ipm [[REG:%r[0-5]]]
|
||||
; CHECK: srl [[REG]], 28
|
||||
; CHECK: st [[REG]], 0(%r2)
|
||||
; CHECK: br %r14
|
||||
%call = call {<16 x i8>, i32} @llvm.s390.vstrsh(<8 x i16> %a, <8 x i16> %b,
|
||||
<16 x i8> %c)
|
||||
%res = extractvalue {<16 x i8>, i32} %call, 0
|
||||
%cc = extractvalue {<16 x i8>, i32} %call, 1
|
||||
store i32 %cc, i32 *%ccptr
|
||||
ret <16 x i8> %res
|
||||
}
|
||||
|
||||
; VSTRSFS.
|
||||
define <16 x i8> @test_vstrsf(<4 x i32> %a, <4 x i32> %b, <16 x i8> %c,
|
||||
i32 *%ccptr) {
|
||||
; CHECK-LABEL: test_vstrsf:
|
||||
; CHECK: vstrsf %v24, %v24, %v26, %v28, 0
|
||||
; CHECK: ipm [[REG:%r[0-5]]]
|
||||
; CHECK: srl [[REG]], 28
|
||||
; CHECK: st [[REG]], 0(%r2)
|
||||
; CHECK: br %r14
|
||||
%call = call {<16 x i8>, i32} @llvm.s390.vstrsf(<4 x i32> %a, <4 x i32> %b,
|
||||
<16 x i8> %c)
|
||||
%res = extractvalue {<16 x i8>, i32} %call, 0
|
||||
%cc = extractvalue {<16 x i8>, i32} %call, 1
|
||||
store i32 %cc, i32 *%ccptr
|
||||
ret <16 x i8> %res
|
||||
}
|
||||
|
||||
; VSTRSZB.
|
||||
define <16 x i8> @test_vstrszb(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c,
|
||||
i32 *%ccptr) {
|
||||
; CHECK-LABEL: test_vstrszb:
|
||||
; CHECK: vstrszb %v24, %v24, %v26, %v28
|
||||
; CHECK: ipm [[REG:%r[0-5]]]
|
||||
; CHECK: srl [[REG]], 28
|
||||
; CHECK: st [[REG]], 0(%r2)
|
||||
; CHECK: br %r14
|
||||
%call = call {<16 x i8>, i32} @llvm.s390.vstrszb(<16 x i8> %a, <16 x i8> %b,
|
||||
<16 x i8> %c)
|
||||
%res = extractvalue {<16 x i8>, i32} %call, 0
|
||||
%cc = extractvalue {<16 x i8>, i32} %call, 1
|
||||
store i32 %cc, i32 *%ccptr
|
||||
ret <16 x i8> %res
|
||||
}
|
||||
|
||||
; VSTRSZH.
|
||||
define <16 x i8> @test_vstrszh(<8 x i16> %a, <8 x i16> %b, <16 x i8> %c,
|
||||
i32 *%ccptr) {
|
||||
; CHECK-LABEL: test_vstrszh:
|
||||
; CHECK: vstrszh %v24, %v24, %v26, %v28
|
||||
; CHECK: ipm [[REG:%r[0-5]]]
|
||||
; CHECK: srl [[REG]], 28
|
||||
; CHECK: st [[REG]], 0(%r2)
|
||||
; CHECK: br %r14
|
||||
%call = call {<16 x i8>, i32} @llvm.s390.vstrszh(<8 x i16> %a, <8 x i16> %b,
|
||||
<16 x i8> %c)
|
||||
%res = extractvalue {<16 x i8>, i32} %call, 0
|
||||
%cc = extractvalue {<16 x i8>, i32} %call, 1
|
||||
store i32 %cc, i32 *%ccptr
|
||||
ret <16 x i8> %res
|
||||
}
|
||||
|
||||
; VSTRSZF.
|
||||
define <16 x i8> @test_vstrszf(<4 x i32> %a, <4 x i32> %b, <16 x i8> %c,
|
||||
i32 *%ccptr) {
|
||||
; CHECK-LABEL: test_vstrszf:
|
||||
; CHECK: vstrszf %v24, %v24, %v26, %v28
|
||||
; CHECK: ipm [[REG:%r[0-5]]]
|
||||
; CHECK: srl [[REG]], 28
|
||||
; CHECK: st [[REG]], 0(%r2)
|
||||
; CHECK: br %r14
|
||||
%call = call {<16 x i8>, i32} @llvm.s390.vstrszf(<4 x i32> %a, <4 x i32> %b,
|
||||
<16 x i8> %c)
|
||||
%res = extractvalue {<16 x i8>, i32} %call, 0
|
||||
%cc = extractvalue {<16 x i8>, i32} %call, 1
|
||||
store i32 %cc, i32 *%ccptr
|
||||
ret <16 x i8> %res
|
||||
}
|
||||
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,881 @@
|
|||
# For arch13 only.
|
||||
# RUN: not llvm-mc -triple s390x-linux-gnu -mcpu=arch13 < %s 2> %t
|
||||
# RUN: FileCheck < %t %s
|
||||
|
||||
#CHECK: error: invalid register pair
|
||||
#CHECK: dfltcc %r1, %r2, %r4
|
||||
#CHECK: error: invalid register pair
|
||||
#CHECK: dfltcc %r2, %r1, %r4
|
||||
|
||||
dfltcc %r1, %r2, %r4
|
||||
dfltcc %r2, %r1, %r4
|
||||
|
||||
#CHECK: error: invalid register pair
|
||||
#CHECK: kdsa %r0, %r1
|
||||
|
||||
kdsa %r0, %r1
|
||||
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: ldrv %f0, -1
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: ldrv %f0, 4096
|
||||
#CHECK: error: invalid use of vector addressing
|
||||
#CHECK: ldrv %f0, 0(%v1,%r2)
|
||||
|
||||
ldrv %f0, -1
|
||||
ldrv %f0, 4096
|
||||
ldrv %f0, 0(%v1,%r2)
|
||||
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: lerv %f0, -1
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: lerv %f0, 4096
|
||||
#CHECK: error: invalid use of vector addressing
|
||||
#CHECK: lerv %f0, 0(%v1,%r2)
|
||||
|
||||
lerv %f0, -1
|
||||
lerv %f0, 4096
|
||||
lerv %f0, 0(%v1,%r2)
|
||||
|
||||
#CHECK: error: invalid use of indexed addressing
|
||||
#CHECK: mvcrl 160(%r1,%r15),160(%r15)
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: mvcrl -1(%r1),160(%r15)
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: mvcrl 4096(%r1),160(%r15)
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: mvcrl 0(%r1),-1(%r15)
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: mvcrl 0(%r1),4096(%r15)
|
||||
|
||||
mvcrl 160(%r1,%r15),160(%r15)
|
||||
mvcrl -1(%r1),160(%r15)
|
||||
mvcrl 4096(%r1),160(%r15)
|
||||
mvcrl 0(%r1),-1(%r15)
|
||||
mvcrl 0(%r1),4096(%r15)
|
||||
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: popcnt %r2, %r4, -1
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: popcnt %r2, %r4, 16
|
||||
|
||||
popcnt %r2, %r4, -1
|
||||
popcnt %r2, %r4, 16
|
||||
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: selgr %r0, %r0, %r0, -1
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: selgr %r0, %r0, %r0, 16
|
||||
|
||||
selgr %r0, %r0, %r0, -1
|
||||
selgr %r0, %r0, %r0, 16
|
||||
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: selfhr %r0, %r0, %r0, -1
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: selfhr %r0, %r0, %r0, 16
|
||||
|
||||
selfhr %r0, %r0, %r0, -1
|
||||
selfhr %r0, %r0, %r0, 16
|
||||
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: selr %r0, %r0, %r0, -1
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: selr %r0, %r0, %r0, 16
|
||||
|
||||
selr %r0, %r0, %r0, -1
|
||||
selr %r0, %r0, %r0, 16
|
||||
|
||||
#CHECK: error: invalid register pair
|
||||
#CHECK: sortl %r1, %r2
|
||||
#CHECK: error: invalid register pair
|
||||
#CHECK: sortl %r2, %r1
|
||||
|
||||
sortl %r1, %r2
|
||||
sortl %r2, %r1
|
||||
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: stdrv %f0, -1
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: stdrv %f0, 4096
|
||||
#CHECK: error: invalid use of vector addressing
|
||||
#CHECK: stdrv %f0, 0(%v1,%r2)
|
||||
|
||||
stdrv %f0, -1
|
||||
stdrv %f0, 4096
|
||||
stdrv %f0, 0(%v1,%r2)
|
||||
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: sterv %f0, -1
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: sterv %f0, 4096
|
||||
#CHECK: error: invalid use of vector addressing
|
||||
#CHECK: sterv %f0, 0(%v1,%r2)
|
||||
|
||||
sterv %f0, -1
|
||||
sterv %f0, 4096
|
||||
sterv %f0, 0(%v1,%r2)
|
||||
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vcefb %v0, %v0, 0, -1
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vcefb %v0, %v0, 0, 16
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vcefb %v0, %v0, -1, 0
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vcefb %v0, %v0, 16, 0
|
||||
|
||||
vcefb %v0, %v0, 0, -1
|
||||
vcefb %v0, %v0, 0, 16
|
||||
vcefb %v0, %v0, -1, 0
|
||||
vcefb %v0, %v0, 16, 0
|
||||
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vcelfb %v0, %v0, 0, -1
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vcelfb %v0, %v0, 0, 16
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vcelfb %v0, %v0, -1, 0
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vcelfb %v0, %v0, 16, 0
|
||||
|
||||
vcelfb %v0, %v0, 0, -1
|
||||
vcelfb %v0, %v0, 0, 16
|
||||
vcelfb %v0, %v0, -1, 0
|
||||
vcelfb %v0, %v0, 16, 0
|
||||
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vcfeb %v0, %v0, 0, -1
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vcfeb %v0, %v0, 0, 16
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vcfeb %v0, %v0, -1, 0
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vcfeb %v0, %v0, 16, 0
|
||||
|
||||
vcfeb %v0, %v0, 0, -1
|
||||
vcfeb %v0, %v0, 0, 16
|
||||
vcfeb %v0, %v0, -1, 0
|
||||
vcfeb %v0, %v0, 16, 0
|
||||
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vcfpl %v0, %v0, 0, 0, -1
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vcfpl %v0, %v0, 0, 0, 16
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vcfpl %v0, %v0, 0, -1, 0
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vcfpl %v0, %v0, 0, 16, 0
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vcfpl %v0, %v0, -1, 0, 0
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vcfpl %v0, %v0, 16, 0, 0
|
||||
|
||||
vcfpl %v0, %v0, 0, 0, -1
|
||||
vcfpl %v0, %v0, 0, 0, 16
|
||||
vcfpl %v0, %v0, 0, -1, 0
|
||||
vcfpl %v0, %v0, 0, 16, 0
|
||||
vcfpl %v0, %v0, -1, 0, 0
|
||||
vcfpl %v0, %v0, 16, 0, 0
|
||||
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vcfps %v0, %v0, 0, 0, -1
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vcfps %v0, %v0, 0, 0, 16
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vcfps %v0, %v0, 0, -1, 0
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vcfps %v0, %v0, 0, 16, 0
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vcfps %v0, %v0, -1, 0, 0
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vcfps %v0, %v0, 16, 0, 0
|
||||
|
||||
vcfps %v0, %v0, 0, 0, -1
|
||||
vcfps %v0, %v0, 0, 0, 16
|
||||
vcfps %v0, %v0, 0, -1, 0
|
||||
vcfps %v0, %v0, 0, 16, 0
|
||||
vcfps %v0, %v0, -1, 0, 0
|
||||
vcfps %v0, %v0, 16, 0, 0
|
||||
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vclfeb %v0, %v0, 0, -1
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vclfeb %v0, %v0, 0, 16
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vclfeb %v0, %v0, -1, 0
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vclfeb %v0, %v0, 16, 0
|
||||
|
||||
vclfeb %v0, %v0, 0, -1
|
||||
vclfeb %v0, %v0, 0, 16
|
||||
vclfeb %v0, %v0, -1, 0
|
||||
vclfeb %v0, %v0, 16, 0
|
||||
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vclfp %v0, %v0, 0, 0, -1
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vclfp %v0, %v0, 0, 0, 16
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vclfp %v0, %v0, 0, -1, 0
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vclfp %v0, %v0, 0, 16, 0
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vclfp %v0, %v0, -1, 0, 0
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vclfp %v0, %v0, 16, 0, 0
|
||||
|
||||
vclfp %v0, %v0, 0, 0, -1
|
||||
vclfp %v0, %v0, 0, 0, 16
|
||||
vclfp %v0, %v0, 0, -1, 0
|
||||
vclfp %v0, %v0, 0, 16, 0
|
||||
vclfp %v0, %v0, -1, 0, 0
|
||||
vclfp %v0, %v0, 16, 0, 0
|
||||
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vcsfp %v0, %v0, 0, 0, -1
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vcsfp %v0, %v0, 0, 0, 16
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vcsfp %v0, %v0, 0, -1, 0
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vcsfp %v0, %v0, 0, 16, 0
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vcsfp %v0, %v0, -1, 0, 0
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vcsfp %v0, %v0, 16, 0, 0
|
||||
|
||||
vcsfp %v0, %v0, 0, 0, -1
|
||||
vcsfp %v0, %v0, 0, 0, 16
|
||||
vcsfp %v0, %v0, 0, -1, 0
|
||||
vcsfp %v0, %v0, 0, 16, 0
|
||||
vcsfp %v0, %v0, -1, 0, 0
|
||||
vcsfp %v0, %v0, 16, 0, 0
|
||||
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vcvb %r0, %v0, 0, -1
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vcvb %r0, %v0, 0, 16
|
||||
|
||||
vcvb %r0, %v0, 0, -1
|
||||
vcvb %r0, %v0, 0, 16
|
||||
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vcvbg %r0, %v0, 0, -1
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vcvbg %r0, %v0, 0, 16
|
||||
|
||||
vcvbg %r0, %v0, 0, -1
|
||||
vcvbg %r0, %v0, 0, 16
|
||||
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vlbr %v0, 0, -1
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vlbr %v0, 0, 16
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vlbr %v0, -1, 0
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vlbr %v0, 4096, 0
|
||||
#CHECK: error: invalid use of vector addressing
|
||||
#CHECK: vlbr %v0, 0(%v1,%r2), 0
|
||||
|
||||
vlbr %v0, 0, -1
|
||||
vlbr %v0, 0, 16
|
||||
vlbr %v0, -1, 0
|
||||
vlbr %v0, 4096, 0
|
||||
vlbr %v0, 0(%v1,%r2), 0
|
||||
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vlbrf %v0, -1
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vlbrf %v0, 4096
|
||||
#CHECK: error: invalid use of vector addressing
|
||||
#CHECK: vlbrf %v0, 0(%v1,%r2)
|
||||
|
||||
vlbrf %v0, -1
|
||||
vlbrf %v0, 4096
|
||||
vlbrf %v0, 0(%v1,%r2)
|
||||
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vlbrg %v0, -1
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vlbrg %v0, 4096
|
||||
#CHECK: error: invalid use of vector addressing
|
||||
#CHECK: vlbrg %v0, 0(%v1,%r2)
|
||||
|
||||
vlbrg %v0, -1
|
||||
vlbrg %v0, 4096
|
||||
vlbrg %v0, 0(%v1,%r2)
|
||||
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vlbrh %v0, -1
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vlbrh %v0, 4096
|
||||
#CHECK: error: invalid use of vector addressing
|
||||
#CHECK: vlbrh %v0, 0(%v1,%r2)
|
||||
|
||||
vlbrh %v0, -1
|
||||
vlbrh %v0, 4096
|
||||
vlbrh %v0, 0(%v1,%r2)
|
||||
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vlbrq %v0, -1
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vlbrq %v0, 4096
|
||||
#CHECK: error: invalid use of vector addressing
|
||||
#CHECK: vlbrq %v0, 0(%v1,%r2)
|
||||
|
||||
vlbrq %v0, -1
|
||||
vlbrq %v0, 4096
|
||||
vlbrq %v0, 0(%v1,%r2)
|
||||
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vlbrrep %v0, 0, -1
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vlbrrep %v0, 0, 16
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vlbrrep %v0, -1, 0
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vlbrrep %v0, 4096, 0
|
||||
#CHECK: error: invalid use of vector addressing
|
||||
#CHECK: vlbrrep %v0, 0(%v1,%r2), 0
|
||||
|
||||
vlbrrep %v0, 0, -1
|
||||
vlbrrep %v0, 0, 16
|
||||
vlbrrep %v0, -1, 0
|
||||
vlbrrep %v0, 4096, 0
|
||||
vlbrrep %v0, 0(%v1,%r2), 0
|
||||
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vlbrrepf %v0, -1
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vlbrrepf %v0, 4096
|
||||
#CHECK: error: invalid use of vector addressing
|
||||
#CHECK: vlbrrepf %v0, 0(%v1,%r2)
|
||||
|
||||
vlbrrepf %v0, -1
|
||||
vlbrrepf %v0, 4096
|
||||
vlbrrepf %v0, 0(%v1,%r2)
|
||||
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vlbrrepg %v0, -1
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vlbrrepg %v0, 4096
|
||||
#CHECK: error: invalid use of vector addressing
|
||||
#CHECK: vlbrrepg %v0, 0(%v1,%r2)
|
||||
|
||||
vlbrrepg %v0, -1
|
||||
vlbrrepg %v0, 4096
|
||||
vlbrrepg %v0, 0(%v1,%r2)
|
||||
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vlbrreph %v0, -1
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vlbrreph %v0, 4096
|
||||
#CHECK: error: invalid use of vector addressing
|
||||
#CHECK: vlbrreph %v0, 0(%v1,%r2)
|
||||
|
||||
vlbrreph %v0, -1
|
||||
vlbrreph %v0, 4096
|
||||
vlbrreph %v0, 0(%v1,%r2)
|
||||
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vlebrf %v0, 0, -1
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vlebrf %v0, 0, 4
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vlebrf %v0, -1, 0
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vlebrf %v0, 4096, 0
|
||||
#CHECK: error: invalid use of vector addressing
|
||||
#CHECK: vlebrf %v0, 0(%v1,%r2), 0
|
||||
|
||||
vlebrf %v0, 0, -1
|
||||
vlebrf %v0, 0, 4
|
||||
vlebrf %v0, -1, 0
|
||||
vlebrf %v0, 4096, 0
|
||||
vlebrf %v0, 0(%v1,%r2), 0
|
||||
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vlebrg %v0, 0, -1
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vlebrg %v0, 0, 2
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vlebrg %v0, -1, 0
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vlebrg %v0, 4096, 0
|
||||
#CHECK: error: invalid use of vector addressing
|
||||
#CHECK: vlebrg %v0, 0(%v1,%r2), 0
|
||||
|
||||
vlebrg %v0, 0, -1
|
||||
vlebrg %v0, 0, 2
|
||||
vlebrg %v0, -1, 0
|
||||
vlebrg %v0, 4096, 0
|
||||
vlebrg %v0, 0(%v1,%r2), 0
|
||||
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vlebrh %v0, 0, -1
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vlebrh %v0, 0, 8
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vlebrh %v0, -1, 0
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vlebrh %v0, 4096, 0
|
||||
#CHECK: error: invalid use of vector addressing
|
||||
#CHECK: vlebrh %v0, 0(%v1,%r2), 0
|
||||
|
||||
vlebrh %v0, 0, -1
|
||||
vlebrh %v0, 0, 8
|
||||
vlebrh %v0, -1, 0
|
||||
vlebrh %v0, 4096, 0
|
||||
vlebrh %v0, 0(%v1,%r2), 0
|
||||
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vler %v0, 0, -1
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vler %v0, 0, 16
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vler %v0, -1, 0
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vler %v0, 4096, 0
|
||||
#CHECK: error: invalid use of vector addressing
|
||||
#CHECK: vler %v0, 0(%v1,%r2), 0
|
||||
|
||||
vler %v0, 0, -1
|
||||
vler %v0, 0, 16
|
||||
vler %v0, -1, 0
|
||||
vler %v0, 4096, 0
|
||||
vler %v0, 0(%v1,%r2), 0
|
||||
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vlerf %v0, -1
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vlerf %v0, 4096
|
||||
#CHECK: error: invalid use of vector addressing
|
||||
#CHECK: vlerf %v0, 0(%v1,%r2)
|
||||
|
||||
vlerf %v0, -1
|
||||
vlerf %v0, 4096
|
||||
vlerf %v0, 0(%v1,%r2)
|
||||
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vlerg %v0, -1
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vlerg %v0, 4096
|
||||
#CHECK: error: invalid use of vector addressing
|
||||
#CHECK: vlerg %v0, 0(%v1,%r2)
|
||||
|
||||
vlerg %v0, -1
|
||||
vlerg %v0, 4096
|
||||
vlerg %v0, 0(%v1,%r2)
|
||||
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vlerh %v0, -1
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vlerh %v0, 4096
|
||||
#CHECK: error: invalid use of vector addressing
|
||||
#CHECK: vlerh %v0, 0(%v1,%r2)
|
||||
|
||||
vlerh %v0, -1
|
||||
vlerh %v0, 4096
|
||||
vlerh %v0, 0(%v1,%r2)
|
||||
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vllebrz %v0, 0, -1
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vllebrz %v0, 0, 16
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vllebrz %v0, -1, 0
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vllebrz %v0, 4096, 0
|
||||
#CHECK: error: invalid use of vector addressing
|
||||
#CHECK: vllebrz %v0, 0(%v1,%r2), 0
|
||||
|
||||
vllebrz %v0, 0, -1
|
||||
vllebrz %v0, 0, 16
|
||||
vllebrz %v0, -1, 0
|
||||
vllebrz %v0, 4096, 0
|
||||
vllebrz %v0, 0(%v1,%r2), 0
|
||||
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vllebrze %v0, -1
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vllebrze %v0, 4096
|
||||
#CHECK: error: invalid use of vector addressing
|
||||
#CHECK: vllebrze %v0, 0(%v1,%r2)
|
||||
|
||||
vllebrze %v0, -1
|
||||
vllebrze %v0, 4096
|
||||
vllebrze %v0, 0(%v1,%r2)
|
||||
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vllebrzf %v0, -1
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vllebrzf %v0, 4096
|
||||
#CHECK: error: invalid use of vector addressing
|
||||
#CHECK: vllebrzf %v0, 0(%v1,%r2)
|
||||
|
||||
vllebrzf %v0, -1
|
||||
vllebrzf %v0, 4096
|
||||
vllebrzf %v0, 0(%v1,%r2)
|
||||
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vllebrzg %v0, -1
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vllebrzg %v0, 4096
|
||||
#CHECK: error: invalid use of vector addressing
|
||||
#CHECK: vllebrzg %v0, 0(%v1,%r2)
|
||||
|
||||
vllebrzg %v0, -1
|
||||
vllebrzg %v0, 4096
|
||||
vllebrzg %v0, 0(%v1,%r2)
|
||||
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vllebrzh %v0, -1
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vllebrzh %v0, 4096
|
||||
#CHECK: error: invalid use of vector addressing
|
||||
#CHECK: vllebrzh %v0, 0(%v1,%r2)
|
||||
|
||||
vllebrzh %v0, -1
|
||||
vllebrzh %v0, 4096
|
||||
vllebrzh %v0, 0(%v1,%r2)
|
||||
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vsld %v0, %v0, %v0, -1
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vsld %v0, %v0, %v0, 256
|
||||
|
||||
vsld %v0, %v0, %v0, -1
|
||||
vsld %v0, %v0, %v0, 256
|
||||
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vsrd %v0, %v0, %v0, -1
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vsrd %v0, %v0, %v0, 256
|
||||
|
||||
vsrd %v0, %v0, %v0, -1
|
||||
vsrd %v0, %v0, %v0, 256
|
||||
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vstbr %v0, 0, -1
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vstbr %v0, 0, 16
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vstbr %v0, -1, 0
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vstbr %v0, 4096, 0
|
||||
#CHECK: error: invalid use of vector addressing
|
||||
#CHECK: vstbr %v0, 0(%v1,%r2), 0
|
||||
|
||||
vstbr %v0, 0, -1
|
||||
vstbr %v0, 0, 16
|
||||
vstbr %v0, -1, 0
|
||||
vstbr %v0, 4096, 0
|
||||
vstbr %v0, 0(%v1,%r2), 0
|
||||
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vstbrf %v0, -1
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vstbrf %v0, 4096
|
||||
#CHECK: error: invalid use of vector addressing
|
||||
#CHECK: vstbrf %v0, 0(%v1,%r2)
|
||||
|
||||
vstbrf %v0, -1
|
||||
vstbrf %v0, 4096
|
||||
vstbrf %v0, 0(%v1,%r2)
|
||||
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vstbrg %v0, -1
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vstbrg %v0, 4096
|
||||
#CHECK: error: invalid use of vector addressing
|
||||
#CHECK: vstbrg %v0, 0(%v1,%r2)
|
||||
|
||||
vstbrg %v0, -1
|
||||
vstbrg %v0, 4096
|
||||
vstbrg %v0, 0(%v1,%r2)
|
||||
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vstbrh %v0, -1
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vstbrh %v0, 4096
|
||||
#CHECK: error: invalid use of vector addressing
|
||||
#CHECK: vstbrh %v0, 0(%v1,%r2)
|
||||
|
||||
vstbrh %v0, -1
|
||||
vstbrh %v0, 4096
|
||||
vstbrh %v0, 0(%v1,%r2)
|
||||
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vstbrq %v0, -1
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vstbrq %v0, 4096
|
||||
#CHECK: error: invalid use of vector addressing
|
||||
#CHECK: vstbrq %v0, 0(%v1,%r2)
|
||||
|
||||
vstbrq %v0, -1
|
||||
vstbrq %v0, 4096
|
||||
vstbrq %v0, 0(%v1,%r2)
|
||||
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vstebrf %v0, 0, -1
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vstebrf %v0, 0, 4
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vstebrf %v0, -1, 0
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vstebrf %v0, 4096, 0
|
||||
#CHECK: error: invalid use of vector addressing
|
||||
#CHECK: vstebrf %v0, 0(%v1,%r2), 0
|
||||
|
||||
vstebrf %v0, 0, -1
|
||||
vstebrf %v0, 0, 4
|
||||
vstebrf %v0, -1, 0
|
||||
vstebrf %v0, 4096, 0
|
||||
vstebrf %v0, 0(%v1,%r2), 0
|
||||
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vstebrg %v0, 0, -1
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vstebrg %v0, 0, 2
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vstebrg %v0, -1, 0
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vstebrg %v0, 4096, 0
|
||||
#CHECK: error: invalid use of vector addressing
|
||||
#CHECK: vstebrg %v0, 0(%v1,%r2), 0
|
||||
|
||||
vstebrg %v0, 0, -1
|
||||
vstebrg %v0, 0, 2
|
||||
vstebrg %v0, -1, 0
|
||||
vstebrg %v0, 4096, 0
|
||||
vstebrg %v0, 0(%v1,%r2), 0
|
||||
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vstebrh %v0, 0, -1
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vstebrh %v0, 0, 8
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vstebrh %v0, -1, 0
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vstebrh %v0, 4096, 0
|
||||
#CHECK: error: invalid use of vector addressing
|
||||
#CHECK: vstebrh %v0, 0(%v1,%r2), 0
|
||||
|
||||
vstebrh %v0, 0, -1
|
||||
vstebrh %v0, 0, 8
|
||||
vstebrh %v0, -1, 0
|
||||
vstebrh %v0, 4096, 0
|
||||
vstebrh %v0, 0(%v1,%r2), 0
|
||||
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vster %v0, 0, -1
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vster %v0, 0, 16
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vster %v0, -1, 0
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vster %v0, 4096, 0
|
||||
#CHECK: error: invalid use of vector addressing
|
||||
#CHECK: vster %v0, 0(%v1,%r2), 0
|
||||
|
||||
vster %v0, 0, -1
|
||||
vster %v0, 0, 16
|
||||
vster %v0, -1, 0
|
||||
vster %v0, 4096, 0
|
||||
vster %v0, 0(%v1,%r2), 0
|
||||
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vsterf %v0, -1
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vsterf %v0, 4096
|
||||
#CHECK: error: invalid use of vector addressing
|
||||
#CHECK: vsterf %v0, 0(%v1,%r2)
|
||||
|
||||
vsterf %v0, -1
|
||||
vsterf %v0, 4096
|
||||
vsterf %v0, 0(%v1,%r2)
|
||||
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vsterg %v0, -1
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vsterg %v0, 4096
|
||||
#CHECK: error: invalid use of vector addressing
|
||||
#CHECK: vsterg %v0, 0(%v1,%r2)
|
||||
|
||||
vsterg %v0, -1
|
||||
vsterg %v0, 4096
|
||||
vsterg %v0, 0(%v1,%r2)
|
||||
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vsterh %v0, -1
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vsterh %v0, 4096
|
||||
#CHECK: error: invalid use of vector addressing
|
||||
#CHECK: vsterh %v0, 0(%v1,%r2)
|
||||
|
||||
vsterh %v0, -1
|
||||
vsterh %v0, 4096
|
||||
vsterh %v0, 0(%v1,%r2)
|
||||
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vstrs %v0, %v0, %v0, %v0, 0, -1
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vstrs %v0, %v0, %v0, %v0, 0, 16
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vstrs %v0, %v0, %v0, %v0, -1, 0
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vstrs %v0, %v0, %v0, %v0, 16, 0
|
||||
#CHECK: error: too few operands
|
||||
#CHECK: vstrs %v0, %v0, %v0, %v0
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vstrs %v0, %v0, %v0, %v0, 0, 0, 0
|
||||
|
||||
vstrs %v0, %v0, %v0, %v0, 0, -1
|
||||
vstrs %v0, %v0, %v0, %v0, 0, 16
|
||||
vstrs %v0, %v0, %v0, %v0, -1, 0
|
||||
vstrs %v0, %v0, %v0, %v0, 16, 0
|
||||
vstrs %v0, %v0, %v0, %v0
|
||||
vstrs %v0, %v0, %v0, %v0, 0, 0, 0
|
||||
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vstrsb %v0, %v0, %v0, %v0, -1
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vstrsb %v0, %v0, %v0, %v0, 16
|
||||
#CHECK: error: too few operands
|
||||
#CHECK: vstrsb %v0, %v0, %v0
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vstrsb %v0, %v0, %v0, %v0, 0, 0
|
||||
|
||||
vstrsb %v0, %v0, %v0, %v0, -1
|
||||
vstrsb %v0, %v0, %v0, %v0, 16
|
||||
vstrsb %v0, %v0, %v0
|
||||
vstrsb %v0, %v0, %v0, %v0, 0, 0
|
||||
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vstrsf %v0, %v0, %v0, %v0, -1
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vstrsf %v0, %v0, %v0, %v0, 16
|
||||
#CHECK: error: too few operands
|
||||
#CHECK: vstrsf %v0, %v0, %v0
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vstrsf %v0, %v0, %v0, %v0, 0, 0
|
||||
|
||||
vstrsf %v0, %v0, %v0, %v0, -1
|
||||
vstrsf %v0, %v0, %v0, %v0, 16
|
||||
vstrsf %v0, %v0, %v0
|
||||
vstrsf %v0, %v0, %v0, %v0, 0, 0
|
||||
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vstrsh %v0, %v0, %v0, %v0, -1
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vstrsh %v0, %v0, %v0, %v0, 16
|
||||
#CHECK: error: too few operands
|
||||
#CHECK: vstrsh %v0, %v0, %v0
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vstrsh %v0, %v0, %v0, %v0, 0, 0
|
||||
|
||||
vstrsh %v0, %v0, %v0, %v0, -1
|
||||
vstrsh %v0, %v0, %v0, %v0, 16
|
||||
vstrsh %v0, %v0, %v0
|
||||
vstrsh %v0, %v0, %v0, %v0, 0, 0
|
||||
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vstrszb %v0, %v0, %v0, %v0, -1
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vstrszb %v0, %v0, %v0, %v0, 16
|
||||
#CHECK: error: too few operands
|
||||
#CHECK: vstrszb %v0, %v0, %v0
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vstrszb %v0, %v0, %v0, %v0, 0, 0
|
||||
|
||||
vstrszb %v0, %v0, %v0, %v0, -1
|
||||
vstrszb %v0, %v0, %v0, %v0, 16
|
||||
vstrszb %v0, %v0, %v0
|
||||
vstrszb %v0, %v0, %v0, %v0, 0, 0
|
||||
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vstrszf %v0, %v0, %v0, %v0, -1
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vstrszf %v0, %v0, %v0, %v0, 16
|
||||
#CHECK: error: too few operands
|
||||
#CHECK: vstrszf %v0, %v0, %v0
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vstrszf %v0, %v0, %v0, %v0, 0, 0
|
||||
|
||||
vstrszf %v0, %v0, %v0, %v0, -1
|
||||
vstrszf %v0, %v0, %v0, %v0, 16
|
||||
vstrszf %v0, %v0, %v0
|
||||
vstrszf %v0, %v0, %v0, %v0, 0, 0
|
||||
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vstrszh %v0, %v0, %v0, %v0, -1
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vstrszh %v0, %v0, %v0, %v0, 16
|
||||
#CHECK: error: too few operands
|
||||
#CHECK: vstrszh %v0, %v0, %v0
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vstrszh %v0, %v0, %v0, %v0, 0, 0
|
||||
|
||||
vstrszh %v0, %v0, %v0, %v0, -1
|
||||
vstrszh %v0, %v0, %v0, %v0, 16
|
||||
vstrszh %v0, %v0, %v0
|
||||
vstrszh %v0, %v0, %v0, %v0, 0, 0
|
||||
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: wcefb %v0, %v0, 0, -1
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: wcefb %v0, %v0, 0, 16
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: wcefb %v0, %v0, -1, 0
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: wcefb %v0, %v0, 16, 0
|
||||
|
||||
wcefb %v0, %v0, 0, -1
|
||||
wcefb %v0, %v0, 0, 16
|
||||
wcefb %v0, %v0, -1, 0
|
||||
wcefb %v0, %v0, 16, 0
|
||||
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: wcelfb %v0, %v0, 0, -1
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: wcelfb %v0, %v0, 0, 16
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: wcelfb %v0, %v0, -1, 0
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: wcelfb %v0, %v0, 16, 0
|
||||
|
||||
wcelfb %v0, %v0, 0, -1
|
||||
wcelfb %v0, %v0, 0, 16
|
||||
wcelfb %v0, %v0, -1, 0
|
||||
wcelfb %v0, %v0, 16, 0
|
||||
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: wcfeb %v0, %v0, 0, -1
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: wcfeb %v0, %v0, 0, 16
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: wcfeb %v0, %v0, -1, 0
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: wcfeb %v0, %v0, 16, 0
|
||||
|
||||
wcfeb %v0, %v0, 0, -1
|
||||
wcfeb %v0, %v0, 0, 16
|
||||
wcfeb %v0, %v0, -1, 0
|
||||
wcfeb %v0, %v0, 16, 0
|
||||
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: wclfeb %v0, %v0, 0, -1
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: wclfeb %v0, %v0, 0, 16
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: wclfeb %v0, %v0, -1, 0
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: wclfeb %v0, %v0, 16, 0
|
||||
|
||||
wclfeb %v0, %v0, 0, -1
|
||||
wclfeb %v0, %v0, 0, 16
|
||||
wclfeb %v0, %v0, -1, 0
|
||||
wclfeb %v0, %v0, 16, 0
|
||||
|
|
@ -34,6 +34,16 @@
|
|||
agh %r0, -524289
|
||||
agh %r0, 524288
|
||||
|
||||
#CHECK: error: instruction requires: deflate-conversion
|
||||
#CHECK: dfltcc %r2, %r4, %r6
|
||||
|
||||
dfltcc %r2, %r4, %r6
|
||||
|
||||
#CHECK: error: instruction requires: message-security-assist-extension9
|
||||
#CHECK: kdsa %r0, %r2
|
||||
|
||||
kdsa %r0, %r2
|
||||
|
||||
#CHECK: error: invalid register pair
|
||||
#CHECK: kma %r1, %r2, %r4
|
||||
#CHECK: error: invalid register pair
|
||||
|
@ -109,6 +119,66 @@
|
|||
msgc %r0, -524289
|
||||
msgc %r0, 524288
|
||||
|
||||
#CHECK: error: instruction requires: miscellaneous-extensions-3
|
||||
#CHECK: mvcrl 0, 0
|
||||
|
||||
mvcrl 0, 0
|
||||
|
||||
#CHECK: error: instruction requires: miscellaneous-extensions-3
|
||||
#CHECK: ncgrk %r0, %r0, %r0
|
||||
|
||||
ncgrk %r0, %r0, %r0
|
||||
|
||||
#CHECK: error: instruction requires: miscellaneous-extensions-3
|
||||
#CHECK: ncrk %r0, %r0, %r0
|
||||
|
||||
ncrk %r0, %r0, %r0
|
||||
|
||||
#CHECK: error: instruction requires: miscellaneous-extensions-3
|
||||
#CHECK: nngrk %r0, %r0, %r0
|
||||
|
||||
nngrk %r0, %r0, %r0
|
||||
|
||||
#CHECK: error: instruction requires: miscellaneous-extensions-3
|
||||
#CHECK: nnrk %r0, %r0, %r0
|
||||
|
||||
nnrk %r0, %r0, %r0
|
||||
|
||||
#CHECK: error: instruction requires: miscellaneous-extensions-3
|
||||
#CHECK: nogrk %r0, %r0, %r0
|
||||
|
||||
nogrk %r0, %r0, %r0
|
||||
|
||||
#CHECK: error: instruction requires: miscellaneous-extensions-3
|
||||
#CHECK: nork %r0, %r0, %r0
|
||||
|
||||
nork %r0, %r0, %r0
|
||||
|
||||
#CHECK: error: instruction requires: miscellaneous-extensions-3
|
||||
#CHECK: nxgrk %r0, %r0, %r0
|
||||
|
||||
nxgrk %r0, %r0, %r0
|
||||
|
||||
#CHECK: error: instruction requires: miscellaneous-extensions-3
|
||||
#CHECK: nxrk %r0, %r0, %r0
|
||||
|
||||
nxrk %r0, %r0, %r0
|
||||
|
||||
#CHECK: error: instruction requires: miscellaneous-extensions-3
|
||||
#CHECK: ocgrk %r0, %r0, %r0
|
||||
|
||||
ocgrk %r0, %r0, %r0
|
||||
|
||||
#CHECK: error: instruction requires: miscellaneous-extensions-3
|
||||
#CHECK: ocrk %r0, %r0, %r0
|
||||
|
||||
ocrk %r0, %r0, %r0
|
||||
|
||||
#CHECK: error: instruction requires: miscellaneous-extensions-3
|
||||
#CHECK: popcnt %r2, %r4, 1
|
||||
|
||||
popcnt %r2, %r4, 1
|
||||
|
||||
#CHECK: error: invalid register pair
|
||||
#CHECK: prno %r1, %r2
|
||||
#CHECK: error: invalid register pair
|
||||
|
@ -117,6 +187,30 @@
|
|||
prno %r1, %r2
|
||||
prno %r2, %r1
|
||||
|
||||
#CHECK: error: instruction requires: miscellaneous-extensions-3
|
||||
#CHECK: selgr %r0, %r0, %r0, 0
|
||||
#CHECK: error: instruction requires: miscellaneous-extensions-3
|
||||
#CHECK: selgre %r0, %r0, %r0
|
||||
|
||||
selgr %r0, %r0, %r0, 0
|
||||
selgre %r0, %r0, %r0
|
||||
|
||||
#CHECK: error: instruction requires: miscellaneous-extensions-3
|
||||
#CHECK: selfhr %r0, %r0, %r0, 0
|
||||
#CHECK: error: instruction requires: miscellaneous-extensions-3
|
||||
#CHECK: selfhre %r0, %r0, %r0
|
||||
|
||||
selfhr %r0, %r0, %r0, 0
|
||||
selfhre %r0, %r0, %r0
|
||||
|
||||
#CHECK: error: instruction requires: miscellaneous-extensions-3
|
||||
#CHECK: selr %r0, %r0, %r0, 0
|
||||
#CHECK: error: instruction requires: miscellaneous-extensions-3
|
||||
#CHECK: selre %r0, %r0, %r0
|
||||
|
||||
selr %r0, %r0, %r0, 0
|
||||
selre %r0, %r0, %r0
|
||||
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: sgh %r0, -524289
|
||||
#CHECK: error: invalid operand
|
||||
|
@ -125,6 +219,11 @@
|
|||
sgh %r0, -524289
|
||||
sgh %r0, 524288
|
||||
|
||||
#CHECK: error: instruction requires: enhanced-sort
|
||||
#CHECK: sortl %r2, %r4
|
||||
|
||||
sortl %r2, %r4
|
||||
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: stgsc %r0, -524289
|
||||
#CHECK: error: invalid operand
|
||||
|
@ -147,6 +246,41 @@
|
|||
vap %v0, %v0, %v0, -1, 0
|
||||
vap %v0, %v0, %v0, 256, 0
|
||||
|
||||
#CHECK: error: instruction requires: vector-enhancements-2
|
||||
#CHECK: vcefb %v0, %v0, 0, 0
|
||||
|
||||
vcefb %v0, %v0, 0, 0
|
||||
|
||||
#CHECK: error: instruction requires: vector-enhancements-2
|
||||
#CHECK: vcelfb %v0, %v0, 0, 0
|
||||
|
||||
vcelfb %v0, %v0, 0, 0
|
||||
|
||||
#CHECK: error: instruction requires: vector-enhancements-2
|
||||
#CHECK: vcfeb %v0, %v0, 0, 0
|
||||
|
||||
vcfeb %v0, %v0, 0, 0
|
||||
|
||||
#CHECK: error: instruction requires: vector-enhancements-2
|
||||
#CHECK: vcfpl %v0, %v0, 0, 0, 0
|
||||
|
||||
vcfpl %v0, %v0, 0, 0, 0
|
||||
|
||||
#CHECK: error: instruction requires: vector-enhancements-2
|
||||
#CHECK: vcfps %v0, %v0, 0, 0, 0
|
||||
|
||||
vcfps %v0, %v0, 0, 0, 0
|
||||
|
||||
#CHECK: error: instruction requires: vector-enhancements-2
|
||||
#CHECK: vclfeb %v0, %v0, 0, 0
|
||||
|
||||
vclfeb %v0, %v0, 0, 0
|
||||
|
||||
#CHECK: error: instruction requires: vector-enhancements-2
|
||||
#CHECK: vclfp %v0, %v0, 0, 0, 0
|
||||
|
||||
vclfp %v0, %v0, 0, 0, 0
|
||||
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vcp %v0, %v0, -1
|
||||
#CHECK: error: invalid operand
|
||||
|
@ -155,21 +289,32 @@
|
|||
vcp %v0, %v0, -1
|
||||
vcp %v0, %v0, 16
|
||||
|
||||
#CHECK: error: instruction requires: vector-enhancements-2
|
||||
#CHECK: vcsfp %v0, %v0, 0, 0, 0
|
||||
|
||||
vcsfp %v0, %v0, 0, 0, 0
|
||||
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vcvb %r0, %v0, -1
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vcvb %r0, %v0, 16
|
||||
#CHECK: error: instruction requires: vector-packed-decimal-enhancement
|
||||
#CHECK: vcvb %r0, %v0, 0, 1
|
||||
|
||||
vcvb %r0, %v0, -1
|
||||
vcvb %r0, %v0, 16
|
||||
vcvb %r0, %v0, 0, 1
|
||||
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vcvbg %r0, %v0, -1
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vcvbg %r0, %v0, 16
|
||||
#CHECK: error: instruction requires: vector-packed-decimal-enhancement
|
||||
#CHECK: vcvbg %r0, %v0, 0, 1
|
||||
|
||||
vcvbg %r0, %v0, -1
|
||||
vcvbg %r0, %v0, 16
|
||||
vcvbg %r0, %v0, 0, 1
|
||||
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vcvd %r0, %v0, 0, -1
|
||||
|
@ -408,6 +553,79 @@
|
|||
vllezlf %v0, 4096
|
||||
vllezlf %v0, 0(%v1,%r2)
|
||||
|
||||
#CHECK: error: instruction requires: vector-enhancements-2
|
||||
#CHECK: vlbr %v0, 0, 0
|
||||
#CHECK: error: instruction requires: vector-enhancements-2
|
||||
#CHECK: vlbrf %v0, 0
|
||||
#CHECK: error: instruction requires: vector-enhancements-2
|
||||
#CHECK: vlbrg %v0, 0
|
||||
#CHECK: error: instruction requires: vector-enhancements-2
|
||||
#CHECK: vlbrh %v0, 0
|
||||
#CHECK: error: instruction requires: vector-enhancements-2
|
||||
#CHECK: vlbrq %v0, 0
|
||||
|
||||
vlbr %v0, 0, 0
|
||||
vlbrf %v0, 0
|
||||
vlbrg %v0, 0
|
||||
vlbrh %v0, 0
|
||||
vlbrq %v0, 0
|
||||
|
||||
#CHECK: error: instruction requires: vector-enhancements-2
|
||||
#CHECK: vlbrrep %v0, 0, 0
|
||||
#CHECK: error: instruction requires: vector-enhancements-2
|
||||
#CHECK: vlbrrepf %v0, 0
|
||||
#CHECK: error: instruction requires: vector-enhancements-2
|
||||
#CHECK: vlbrrepg %v0, 0
|
||||
#CHECK: error: instruction requires: vector-enhancements-2
|
||||
#CHECK: vlbrreph %v0, 0
|
||||
|
||||
vlbrrep %v0, 0, 0
|
||||
vlbrrepf %v0, 0
|
||||
vlbrrepg %v0, 0
|
||||
vlbrreph %v0, 0
|
||||
|
||||
#CHECK: error: instruction requires: vector-enhancements-2
|
||||
#CHECK: vlebrf %v0, 0, 0
|
||||
#CHECK: error: instruction requires: vector-enhancements-2
|
||||
#CHECK: vlebrg %v0, 0, 0
|
||||
#CHECK: error: instruction requires: vector-enhancements-2
|
||||
#CHECK: vlebrh %v0, 0, 0
|
||||
|
||||
vlebrf %v0, 0, 0
|
||||
vlebrg %v0, 0, 0
|
||||
vlebrh %v0, 0, 0
|
||||
|
||||
#CHECK: error: instruction requires: vector-enhancements-2
|
||||
#CHECK: vler %v0, 0, 0
|
||||
#CHECK: error: instruction requires: vector-enhancements-2
|
||||
#CHECK: vlerf %v0, 0
|
||||
#CHECK: error: instruction requires: vector-enhancements-2
|
||||
#CHECK: vlerg %v0, 0
|
||||
#CHECK: error: instruction requires: vector-enhancements-2
|
||||
#CHECK: vlerh %v0, 0
|
||||
|
||||
vler %v0, 0, 0
|
||||
vlerf %v0, 0
|
||||
vlerg %v0, 0
|
||||
vlerh %v0, 0
|
||||
|
||||
#CHECK: error: instruction requires: vector-enhancements-2
|
||||
#CHECK: vllebrz %v0, 0, 0
|
||||
#CHECK: error: instruction requires: vector-enhancements-2
|
||||
#CHECK: vllebrze %v0, 0
|
||||
#CHECK: error: instruction requires: vector-enhancements-2
|
||||
#CHECK: vllebrzf %v0, 0
|
||||
#CHECK: error: instruction requires: vector-enhancements-2
|
||||
#CHECK: vllebrzg %v0, 0
|
||||
#CHECK: error: instruction requires: vector-enhancements-2
|
||||
#CHECK: vllebrzh %v0, 0
|
||||
|
||||
vllebrz %v0, 0, 0
|
||||
vllebrze %v0, 0
|
||||
vllebrzf %v0, 0
|
||||
vllebrzg %v0, 0
|
||||
vllebrzh %v0, 0
|
||||
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vlrl %v0, 0, -1
|
||||
#CHECK: error: invalid operand
|
||||
|
@ -551,6 +769,11 @@
|
|||
vsdp %v0, %v0, %v0, -1, 0
|
||||
vsdp %v0, %v0, %v0, 256, 0
|
||||
|
||||
#CHECK: error: instruction requires: vector-enhancements-2
|
||||
#CHECK: vsld %v0, %v0, %v0, 0
|
||||
|
||||
vsld %v0, %v0, %v0, 0
|
||||
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vsp %v0, %v0, %v0, 0, -1
|
||||
#CHECK: error: invalid operand
|
||||
|
@ -565,6 +788,11 @@
|
|||
vsp %v0, %v0, %v0, -1, 0
|
||||
vsp %v0, %v0, %v0, 256, 0
|
||||
|
||||
#CHECK: error: instruction requires: vector-enhancements-2
|
||||
#CHECK: vsrd %v0, %v0, %v0, 0
|
||||
|
||||
vsrd %v0, %v0, %v0, 0
|
||||
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vsrp %v0, %v0, 0, 0, -1
|
||||
#CHECK: error: invalid operand
|
||||
|
@ -585,6 +813,48 @@
|
|||
vsrp %v0, %v0, -1, 0, 0
|
||||
vsrp %v0, %v0, 256, 0, 0
|
||||
|
||||
#CHECK: error: instruction requires: vector-enhancements-2
|
||||
#CHECK: vstbr %v0, 0, 0
|
||||
#CHECK: error: instruction requires: vector-enhancements-2
|
||||
#CHECK: vstbrf %v0, 0
|
||||
#CHECK: error: instruction requires: vector-enhancements-2
|
||||
#CHECK: vstbrg %v0, 0
|
||||
#CHECK: error: instruction requires: vector-enhancements-2
|
||||
#CHECK: vstbrh %v0, 0
|
||||
#CHECK: error: instruction requires: vector-enhancements-2
|
||||
#CHECK: vstbrq %v0, 0
|
||||
|
||||
vstbr %v0, 0, 0
|
||||
vstbrf %v0, 0
|
||||
vstbrg %v0, 0
|
||||
vstbrh %v0, 0
|
||||
vstbrq %v0, 0
|
||||
|
||||
#CHECK: error: instruction requires: vector-enhancements-2
|
||||
#CHECK: vstebrf %v0, 0, 0
|
||||
#CHECK: error: instruction requires: vector-enhancements-2
|
||||
#CHECK: vstebrg %v0, 0, 0
|
||||
#CHECK: error: instruction requires: vector-enhancements-2
|
||||
#CHECK: vstebrh %v0, 0, 0
|
||||
|
||||
vstebrf %v0, 0, 0
|
||||
vstebrg %v0, 0, 0
|
||||
vstebrh %v0, 0, 0
|
||||
|
||||
#CHECK: error: instruction requires: vector-enhancements-2
|
||||
#CHECK: vster %v0, 0, 0
|
||||
#CHECK: error: instruction requires: vector-enhancements-2
|
||||
#CHECK: vsterf %v0, 0
|
||||
#CHECK: error: instruction requires: vector-enhancements-2
|
||||
#CHECK: vsterg %v0, 0
|
||||
#CHECK: error: instruction requires: vector-enhancements-2
|
||||
#CHECK: vsterh %v0, 0
|
||||
|
||||
vster %v0, 0, 0
|
||||
vsterf %v0, 0
|
||||
vsterg %v0, 0
|
||||
vsterh %v0, 0
|
||||
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vstrl %v0, 0, -1
|
||||
#CHECK: error: invalid operand
|
||||
|
@ -613,6 +883,29 @@
|
|||
vstrlr %v0, %r0, 4096
|
||||
vstrlr %v0, %r0, 0(%r0)
|
||||
|
||||
#CHECK: error: instruction requires: vector-enhancements-2
|
||||
#CHECK: vstrs %v0, %v0, %v0, %v0, 0
|
||||
#CHECK: error: instruction requires: vector-enhancements-2
|
||||
#CHECK: vstrsb %v0, %v0, %v0, %v0
|
||||
#CHECK: error: instruction requires: vector-enhancements-2
|
||||
#CHECK: vstrsf %v0, %v0, %v0, %v0
|
||||
#CHECK: error: instruction requires: vector-enhancements-2
|
||||
#CHECK: vstrsh %v0, %v0, %v0, %v0
|
||||
#CHECK: error: instruction requires: vector-enhancements-2
|
||||
#CHECK: vstrszb %v0, %v0, %v0, %v0
|
||||
#CHECK: error: instruction requires: vector-enhancements-2
|
||||
#CHECK: vstrszf %v0, %v0, %v0, %v0
|
||||
#CHECK: error: instruction requires: vector-enhancements-2
|
||||
#CHECK: vstrszh %v0, %v0, %v0, %v0
|
||||
|
||||
vstrs %v0, %v0, %v0, %v0, 0
|
||||
vstrsb %v0, %v0, %v0, %v0
|
||||
vstrsf %v0, %v0, %v0, %v0
|
||||
vstrsh %v0, %v0, %v0, %v0
|
||||
vstrszb %v0, %v0, %v0, %v0
|
||||
vstrszf %v0, %v0, %v0, %v0
|
||||
vstrszh %v0, %v0, %v0, %v0
|
||||
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: vupkz %v0, 0, -1
|
||||
#CHECK: error: invalid operand
|
||||
|
@ -630,6 +923,26 @@
|
|||
vupkz %v0, 4096, 0
|
||||
vupkz %v0, 0(%r0), 0
|
||||
|
||||
#CHECK: error: instruction requires: vector-enhancements-2
|
||||
#CHECK: wcefb %v0, %v0, 0, 0
|
||||
|
||||
wcefb %v0, %v0, 0, 0
|
||||
|
||||
#CHECK: error: instruction requires: vector-enhancements-2
|
||||
#CHECK: wcelfb %v0, %v0, 0, 0
|
||||
|
||||
wcelfb %v0, %v0, 0, 0
|
||||
|
||||
#CHECK: error: instruction requires: vector-enhancements-2
|
||||
#CHECK: wcfeb %v0, %v0, 0, 0
|
||||
|
||||
wcfeb %v0, %v0, 0, 0
|
||||
|
||||
#CHECK: error: instruction requires: vector-enhancements-2
|
||||
#CHECK: wclfeb %v0, %v0, 0, 0
|
||||
|
||||
wclfeb %v0, %v0, 0, 0
|
||||
|
||||
#CHECK: error: invalid operand
|
||||
#CHECK: wfisb %v0, %v0, 0, -1
|
||||
#CHECK: error: invalid operand
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -381,3 +381,21 @@ define <16 x i8> @test_vsldb(<16 x i8> %a, <16 x i8> %b, i32 %c) {
|
|||
ret <16 x i8> %res
|
||||
}
|
||||
|
||||
declare <16 x i8> @llvm.s390.vsld(<16 x i8>, <16 x i8>, i32)
|
||||
define <16 x i8> @test_vsld(<16 x i8> %a, <16 x i8> %b, i32 %c) {
|
||||
; CHECK: immarg operand has non-immediate parameter
|
||||
; CHECK-NEXT: i32 %c
|
||||
; CHECK-NEXT: %res = call <16 x i8> @llvm.s390.vsld(<16 x i8> %a, <16 x i8> %b, i32 %c)
|
||||
%res = call <16 x i8> @llvm.s390.vsld(<16 x i8> %a, <16 x i8> %b, i32 %c)
|
||||
ret <16 x i8> %res
|
||||
}
|
||||
|
||||
declare <16 x i8> @llvm.s390.vsrd(<16 x i8>, <16 x i8>, i32)
|
||||
define <16 x i8> @test_vsrd(<16 x i8> %a, <16 x i8> %b, i32 %c) {
|
||||
; CHECK: immarg operand has non-immediate parameter
|
||||
; CHECK-NEXT: i32 %c
|
||||
; CHECK-NEXT: %res = call <16 x i8> @llvm.s390.vsrd(<16 x i8> %a, <16 x i8> %b, i32 %c)
|
||||
%res = call <16 x i8> @llvm.s390.vsrd(<16 x i8> %a, <16 x i8> %b, i32 %c)
|
||||
ret <16 x i8> %res
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue