[SystemZ] Add support for new cpu architecture - arch13

This patch series adds support for the next-generation arch13
CPU architecture to the SystemZ backend.

This includes:
- Basic support for the new processor and its features.
- Assembler/disassembler support for new instructions.
- CodeGen for new instructions, including new LLVM intrinsics.
- Scheduler description for the new processor.
- Detection of arch13 as host processor.

Note: No currently available Z system supports the arch13
architecture.  Once new systems become available, the
official system name will be added as supported -march name.

llvm-svn: 365932
This commit is contained in:
Ulrich Weigand 2019-07-12 18:13:16 +00:00
parent 223573c8ba
commit 0f0a8b7784
47 changed files with 9009 additions and 73 deletions

View File

@ -48,6 +48,9 @@ class SystemZTernaryConv<string name, LLVMType result, LLVMType arg>
: GCCBuiltin<"__builtin_s390_" ## name>,
Intrinsic<[result], [arg, arg, result], [IntrNoMem]>;
class SystemZTernaryConvCC<LLVMType result, LLVMType arg>
: Intrinsic<[result, llvm_i32_ty], [arg, arg, result], [IntrNoMem]>;
class SystemZTernary<string name, LLVMType type>
: SystemZTernaryConv<name, type, type>;
@ -415,6 +418,24 @@ let TargetPrefix = "s390" in {
def int_s390_vstrl : GCCBuiltin<"__builtin_s390_vstrl">,
Intrinsic<[], [llvm_v16i8_ty, llvm_i32_ty, llvm_ptr_ty],
[IntrArgMemOnly, IntrWriteMem]>;
// Instructions from the Vector Enhancements Facility 2
def int_s390_vsld : GCCBuiltin<"__builtin_s390_vsld">,
Intrinsic<[llvm_v16i8_ty],
[llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<2>]>;
def int_s390_vsrd : GCCBuiltin<"__builtin_s390_vsrd">,
Intrinsic<[llvm_v16i8_ty],
[llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<2>]>;
def int_s390_vstrsb : SystemZTernaryConvCC<llvm_v16i8_ty, llvm_v16i8_ty>;
def int_s390_vstrsh : SystemZTernaryConvCC<llvm_v16i8_ty, llvm_v8i16_ty>;
def int_s390_vstrsf : SystemZTernaryConvCC<llvm_v16i8_ty, llvm_v4i32_ty>;
def int_s390_vstrszb : SystemZTernaryConvCC<llvm_v16i8_ty, llvm_v16i8_ty>;
def int_s390_vstrszh : SystemZTernaryConvCC<llvm_v16i8_ty, llvm_v8i16_ty>;
def int_s390_vstrszf : SystemZTernaryConvCC<llvm_v16i8_ty, llvm_v4i32_ty>;
}
//===----------------------------------------------------------------------===//

View File

@ -315,6 +315,8 @@ StringRef sys::detail::getHostCPUNameForS390x(StringRef ProcCpuinfoContent) {
Pos += sizeof("machine = ") - 1;
unsigned int Id;
if (!Lines[I].drop_front(Pos).getAsInteger(10, Id)) {
if (Id >= 8561 && HaveVectorSupport)
return "arch13";
if (Id >= 3906 && HaveVectorSupport)
return "z14";
if (Id >= 2964 && HaveVectorSupport)

View File

@ -239,6 +239,51 @@ def Arch12NewFeatures : SystemZFeatureList<[
FeatureInsertReferenceBitsMultiple
]>;
//===----------------------------------------------------------------------===//
//
// New features added in the Thirteenth Edition of the z/Architecture
//
//===----------------------------------------------------------------------===//
def FeatureMiscellaneousExtensions3 : SystemZFeature<
"miscellaneous-extensions-3", "MiscellaneousExtensions3",
"Assume that the miscellaneous-extensions facility 3 is installed"
>;
def FeatureMessageSecurityAssist9 : SystemZFeature<
"message-security-assist-extension9", "MessageSecurityAssist9",
"Assume that the message-security-assist extension facility 9 is installed"
>;
def FeatureVectorEnhancements2 : SystemZFeature<
"vector-enhancements-2", "VectorEnhancements2",
"Assume that the vector enhancements facility 2 is installed"
>;
def FeatureVectorPackedDecimalEnhancement : SystemZFeature<
"vector-packed-decimal-enhancement", "VectorPackedDecimalEnhancement",
"Assume that the vector packed decimal enhancement facility is installed"
>;
def FeatureEnhancedSort : SystemZFeature<
"enhanced-sort", "EnhancedSort",
"Assume that the enhanced-sort facility is installed"
>;
def FeatureDeflateConversion : SystemZFeature<
"deflate-conversion", "DeflateConversion",
"Assume that the deflate-conversion facility is installed"
>;
def Arch13NewFeatures : SystemZFeatureList<[
FeatureMiscellaneousExtensions3,
FeatureMessageSecurityAssist9,
FeatureVectorEnhancements2,
FeatureVectorPackedDecimalEnhancement,
FeatureEnhancedSort,
FeatureDeflateConversion
]>;
//===----------------------------------------------------------------------===//
//
// Cumulative supported and unsupported feature sets
@ -255,9 +300,13 @@ def Arch11SupportedFeatures
: SystemZFeatureAdd<Arch10SupportedFeatures.List, Arch11NewFeatures.List>;
def Arch12SupportedFeatures
: SystemZFeatureAdd<Arch11SupportedFeatures.List, Arch12NewFeatures.List>;
def Arch13SupportedFeatures
: SystemZFeatureAdd<Arch12SupportedFeatures.List, Arch13NewFeatures.List>;
def Arch12UnsupportedFeatures
def Arch13UnsupportedFeatures
: SystemZFeatureList<[]>;
def Arch12UnsupportedFeatures
: SystemZFeatureAdd<Arch13UnsupportedFeatures.List, Arch13NewFeatures.List>;
def Arch11UnsupportedFeatures
: SystemZFeatureAdd<Arch12UnsupportedFeatures.List, Arch12NewFeatures.List>;
def Arch10UnsupportedFeatures

View File

@ -1480,6 +1480,23 @@ void SystemZDAGToDAGISel::Select(SDNode *Node) {
Node->getOperand(0).getOpcode() != ISD::Constant)
if (auto *Op1 = dyn_cast<ConstantSDNode>(Node->getOperand(1))) {
uint64_t Val = Op1->getZExtValue();
// Don't split the operation if we can match one of the combined
// logical operations provided by miscellaneous-extensions-3.
if (Subtarget->hasMiscellaneousExtensions3()) {
unsigned ChildOpcode = Node->getOperand(0).getOpcode();
// Check whether this expression matches NAND/NOR/NXOR.
if (Val == (uint64_t)-1 && Opcode == ISD::XOR)
if (ChildOpcode == ISD::AND || ChildOpcode == ISD::OR ||
ChildOpcode == ISD::XOR)
break;
// Check whether this expression matches OR-with-complement.
if (Opcode == ISD::OR && ChildOpcode == ISD::XOR) {
auto Op0 = Node->getOperand(0);
if (auto *Op0Op1 = dyn_cast<ConstantSDNode>(Op0->getOperand(1)))
if (Op0Op1->getZExtValue() == (uint64_t)-1)
break;
}
}
if (!SystemZ::isImmLF(Val) && !SystemZ::isImmHF(Val)) {
splitLargeImmediate(Opcode, Node, Node->getOperand(0),
Val - uint32_t(Val), uint32_t(Val));

View File

@ -252,6 +252,12 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Promote);
setOperationAction(ISD::CTLZ, MVT::i64, Legal);
// On arch13 we have native support for a 64-bit CTPOP.
if (Subtarget.hasMiscellaneousExtensions3()) {
setOperationAction(ISD::CTPOP, MVT::i32, Promote);
setOperationAction(ISD::CTPOP, MVT::i64, Legal);
}
// Give LowerOperation the chance to replace 64-bit ORs with subregs.
setOperationAction(ISD::OR, MVT::i64, Custom);
@ -377,6 +383,17 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::UINT_TO_FP, MVT::v2f64, Legal);
}
if (Subtarget.hasVectorEnhancements2()) {
setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
setOperationAction(ISD::FP_TO_SINT, MVT::v4f32, Legal);
setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);
setOperationAction(ISD::FP_TO_UINT, MVT::v4f32, Legal);
setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
setOperationAction(ISD::SINT_TO_FP, MVT::v4f32, Legal);
setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal);
setOperationAction(ISD::UINT_TO_FP, MVT::v4f32, Legal);
}
// Handle floating-point types.
for (unsigned I = MVT::FIRST_FP_VALUETYPE;
I <= MVT::LAST_FP_VALUETYPE;
@ -576,6 +593,7 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
setTargetDAGCombine(ISD::SIGN_EXTEND_INREG);
setTargetDAGCombine(ISD::LOAD);
setTargetDAGCombine(ISD::STORE);
setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
setTargetDAGCombine(ISD::FP_ROUND);
setTargetDAGCombine(ISD::FP_EXTEND);
@ -1809,6 +1827,20 @@ static bool isIntrinsicWithCC(SDValue Op, unsigned &Opcode, unsigned &CCValid) {
CCValid = SystemZ::CCMASK_ANY;
return true;
case Intrinsic::s390_vstrsb:
case Intrinsic::s390_vstrsh:
case Intrinsic::s390_vstrsf:
Opcode = SystemZISD::VSTRS_CC;
CCValid = SystemZ::CCMASK_ANY;
return true;
case Intrinsic::s390_vstrszb:
case Intrinsic::s390_vstrszh:
case Intrinsic::s390_vstrszf:
Opcode = SystemZISD::VSTRSZ_CC;
CCValid = SystemZ::CCMASK_ANY;
return true;
case Intrinsic::s390_vfcedbs:
case Intrinsic::s390_vfcesbs:
Opcode = SystemZISD::VFCMPES;
@ -4506,9 +4538,18 @@ static SDValue tryBuildVectorShuffle(SelectionDAG &DAG,
return GS.getNode(DAG, SDLoc(BVN));
}
bool SystemZTargetLowering::isVectorElementLoad(SDValue Op) const {
if (Op.getOpcode() == ISD::LOAD && cast<LoadSDNode>(Op)->isUnindexed())
return true;
if (Subtarget.hasVectorEnhancements2() && Op.getOpcode() == SystemZISD::LRV)
return true;
return false;
}
// Combine GPR scalar values Elems into a vector of type VT.
static SDValue buildVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
SmallVectorImpl<SDValue> &Elems) {
SDValue
SystemZTargetLowering::buildVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
SmallVectorImpl<SDValue> &Elems) const {
// See whether there is a single replicated value.
SDValue Single;
unsigned int NumElements = Elems.size();
@ -4537,13 +4578,13 @@ static SDValue buildVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
// we would need 2 instructions to replicate it: VLVGP followed by VREPx.
// This is only a win if the single defined element is used more than once.
// In other cases we're better off using a single VLVGx.
if (Single.getNode() && (Count > 1 || Single.getOpcode() == ISD::LOAD))
if (Single.getNode() && (Count > 1 || isVectorElementLoad(Single)))
return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Single);
// If all elements are loads, use VLREP/VLEs (below).
bool AllLoads = true;
for (auto Elem : Elems)
if (Elem.getOpcode() != ISD::LOAD || cast<LoadSDNode>(Elem)->isIndexed()) {
if (!isVectorElementLoad(Elem)) {
AllLoads = false;
break;
}
@ -4615,8 +4656,7 @@ static SDValue buildVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
std::map<const SDNode*, unsigned> UseCounts;
SDNode *LoadMaxUses = nullptr;
for (unsigned I = 0; I < NumElements; ++I)
if (Elems[I].getOpcode() == ISD::LOAD &&
cast<LoadSDNode>(Elems[I])->isUnindexed()) {
if (isVectorElementLoad(Elems[I])) {
SDNode *Ld = Elems[I].getNode();
UseCounts[Ld]++;
if (LoadMaxUses == nullptr || UseCounts[LoadMaxUses] < UseCounts[Ld])
@ -5152,6 +5192,8 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const {
OPCODE(VISTR_CC);
OPCODE(VSTRC_CC);
OPCODE(VSTRCZ_CC);
OPCODE(VSTRS_CC);
OPCODE(VSTRSZ_CC);
OPCODE(TDC);
OPCODE(ATOMIC_SWAPW);
OPCODE(ATOMIC_LOADW_ADD);
@ -5171,6 +5213,8 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const {
OPCODE(ATOMIC_CMP_SWAP_128);
OPCODE(LRV);
OPCODE(STRV);
OPCODE(VLER);
OPCODE(VSTER);
OPCODE(PREFETCH);
}
return nullptr;
@ -5484,6 +5528,31 @@ SDValue SystemZTargetLowering::combineLOAD(
return SDValue(N, 0);
}
bool SystemZTargetLowering::canLoadStoreByteSwapped(EVT VT) const {
if (VT == MVT::i16 || VT == MVT::i32 || VT == MVT::i64)
return true;
if (Subtarget.hasVectorEnhancements2())
if (VT == MVT::v8i16 || VT == MVT::v4i32 || VT == MVT::v2i64)
return true;
return false;
}
static bool isVectorElementSwap(ArrayRef<int> M, EVT VT) {
if (!VT.isVector() || !VT.isSimple() ||
VT.getSizeInBits() != 128 ||
VT.getScalarSizeInBits() % 8 != 0)
return false;
unsigned NumElts = VT.getVectorNumElements();
for (unsigned i = 0; i < NumElts; ++i) {
if (M[i] < 0) continue; // ignore UNDEF indices
if ((unsigned) M[i] != NumElts - 1 - i)
return false;
}
return true;
}
SDValue SystemZTargetLowering::combineSTORE(
SDNode *N, DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
@ -5505,13 +5574,11 @@ SDValue SystemZTargetLowering::combineSTORE(
SN->getMemOperand());
}
}
// Combine STORE (BSWAP) into STRVH/STRV/STRVG
// Combine STORE (BSWAP) into STRVH/STRV/STRVG/VSTBR
if (!SN->isTruncatingStore() &&
Op1.getOpcode() == ISD::BSWAP &&
Op1.getNode()->hasOneUse() &&
(Op1.getValueType() == MVT::i16 ||
Op1.getValueType() == MVT::i32 ||
Op1.getValueType() == MVT::i64)) {
canLoadStoreByteSwapped(Op1.getValueType())) {
SDValue BSwapOp = Op1.getOperand(0);
@ -5526,15 +5593,97 @@ SDValue SystemZTargetLowering::combineSTORE(
DAG.getMemIntrinsicNode(SystemZISD::STRV, SDLoc(N), DAG.getVTList(MVT::Other),
Ops, MemVT, SN->getMemOperand());
}
// Combine STORE (element-swap) into VSTER
if (!SN->isTruncatingStore() &&
Op1.getOpcode() == ISD::VECTOR_SHUFFLE &&
Op1.getNode()->hasOneUse() &&
Subtarget.hasVectorEnhancements2()) {
ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op1.getNode());
ArrayRef<int> ShuffleMask = SVN->getMask();
if (isVectorElementSwap(ShuffleMask, Op1.getValueType())) {
SDValue Ops[] = {
N->getOperand(0), Op1.getOperand(0), N->getOperand(2)
};
return DAG.getMemIntrinsicNode(SystemZISD::VSTER, SDLoc(N),
DAG.getVTList(MVT::Other),
Ops, MemVT, SN->getMemOperand());
}
}
return SDValue();
}
SDValue SystemZTargetLowering::combineVECTOR_SHUFFLE(
SDNode *N, DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
// Combine element-swap (LOAD) into VLER
if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
N->getOperand(0).hasOneUse() &&
Subtarget.hasVectorEnhancements2()) {
ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
ArrayRef<int> ShuffleMask = SVN->getMask();
if (isVectorElementSwap(ShuffleMask, N->getValueType(0))) {
SDValue Load = N->getOperand(0);
LoadSDNode *LD = cast<LoadSDNode>(Load);
// Create the element-swapping load.
SDValue Ops[] = {
LD->getChain(), // Chain
LD->getBasePtr() // Ptr
};
SDValue ESLoad =
DAG.getMemIntrinsicNode(SystemZISD::VLER, SDLoc(N),
DAG.getVTList(LD->getValueType(0), MVT::Other),
Ops, LD->getMemoryVT(), LD->getMemOperand());
// First, combine the VECTOR_SHUFFLE away. This makes the value produced
// by the load dead.
DCI.CombineTo(N, ESLoad);
// Next, combine the load away, we give it a bogus result value but a real
// chain result. The result value is dead because the shuffle is dead.
DCI.CombineTo(Load.getNode(), ESLoad, ESLoad.getValue(1));
// Return N so it doesn't get rechecked!
return SDValue(N, 0);
}
}
return SDValue();
}
SDValue SystemZTargetLowering::combineEXTRACT_VECTOR_ELT(
SDNode *N, DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
if (!Subtarget.hasVector())
return SDValue();
// Look through bitcasts that retain the number of vector elements.
SDValue Op = N->getOperand(0);
if (Op.getOpcode() == ISD::BITCAST &&
Op.getValueType().isVector() &&
Op.getOperand(0).getValueType().isVector() &&
Op.getValueType().getVectorNumElements() ==
Op.getOperand(0).getValueType().getVectorNumElements())
Op = Op.getOperand(0);
// Pull BSWAP out of a vector extraction.
if (Op.getOpcode() == ISD::BSWAP && Op.hasOneUse()) {
EVT VecVT = Op.getValueType();
EVT EltVT = VecVT.getVectorElementType();
Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), EltVT,
Op.getOperand(0), N->getOperand(1));
DCI.AddToWorklist(Op.getNode());
Op = DAG.getNode(ISD::BSWAP, SDLoc(N), EltVT, Op);
if (EltVT != N->getValueType(0)) {
DCI.AddToWorklist(Op.getNode());
Op = DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0), Op);
}
return Op;
}
// Try to simplify a vector extraction.
if (auto *IndexN = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
SDValue Op0 = N->getOperand(0);
@ -5660,11 +5809,10 @@ SDValue SystemZTargetLowering::combineFP_EXTEND(
SDValue SystemZTargetLowering::combineBSWAP(
SDNode *N, DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
// Combine BSWAP (LOAD) into LRVH/LRV/LRVG
// Combine BSWAP (LOAD) into LRVH/LRV/LRVG/VLBR
if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
N->getOperand(0).hasOneUse() &&
(N->getValueType(0) == MVT::i16 || N->getValueType(0) == MVT::i32 ||
N->getValueType(0) == MVT::i64)) {
canLoadStoreByteSwapped(N->getValueType(0))) {
SDValue Load = N->getOperand(0);
LoadSDNode *LD = cast<LoadSDNode>(Load);
@ -5697,6 +5845,74 @@ SDValue SystemZTargetLowering::combineBSWAP(
// Return N so it doesn't get rechecked!
return SDValue(N, 0);
}
// Look through bitcasts that retain the number of vector elements.
SDValue Op = N->getOperand(0);
if (Op.getOpcode() == ISD::BITCAST &&
Op.getValueType().isVector() &&
Op.getOperand(0).getValueType().isVector() &&
Op.getValueType().getVectorNumElements() ==
Op.getOperand(0).getValueType().getVectorNumElements())
Op = Op.getOperand(0);
// Push BSWAP into a vector insertion if at least one side then simplifies.
if (Op.getOpcode() == ISD::INSERT_VECTOR_ELT && Op.hasOneUse()) {
SDValue Vec = Op.getOperand(0);
SDValue Elt = Op.getOperand(1);
SDValue Idx = Op.getOperand(2);
if (DAG.isConstantIntBuildVectorOrConstantInt(Vec) ||
Vec.getOpcode() == ISD::BSWAP || Vec.isUndef() ||
DAG.isConstantIntBuildVectorOrConstantInt(Elt) ||
Elt.getOpcode() == ISD::BSWAP || Elt.isUndef() ||
(canLoadStoreByteSwapped(N->getValueType(0)) &&
ISD::isNON_EXTLoad(Elt.getNode()) && Elt.hasOneUse())) {
EVT VecVT = N->getValueType(0);
EVT EltVT = N->getValueType(0).getVectorElementType();
if (VecVT != Vec.getValueType()) {
Vec = DAG.getNode(ISD::BITCAST, SDLoc(N), VecVT, Vec);
DCI.AddToWorklist(Vec.getNode());
}
if (EltVT != Elt.getValueType()) {
Elt = DAG.getNode(ISD::BITCAST, SDLoc(N), EltVT, Elt);
DCI.AddToWorklist(Elt.getNode());
}
Vec = DAG.getNode(ISD::BSWAP, SDLoc(N), VecVT, Vec);
DCI.AddToWorklist(Vec.getNode());
Elt = DAG.getNode(ISD::BSWAP, SDLoc(N), EltVT, Elt);
DCI.AddToWorklist(Elt.getNode());
return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), VecVT,
Vec, Elt, Idx);
}
}
// Push BSWAP into a vector shuffle if at least one side then simplifies.
ShuffleVectorSDNode *SV = dyn_cast<ShuffleVectorSDNode>(Op);
if (SV && Op.hasOneUse()) {
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
if (DAG.isConstantIntBuildVectorOrConstantInt(Op0) ||
Op0.getOpcode() == ISD::BSWAP || Op0.isUndef() ||
DAG.isConstantIntBuildVectorOrConstantInt(Op1) ||
Op1.getOpcode() == ISD::BSWAP || Op1.isUndef()) {
EVT VecVT = N->getValueType(0);
if (VecVT != Op0.getValueType()) {
Op0 = DAG.getNode(ISD::BITCAST, SDLoc(N), VecVT, Op0);
DCI.AddToWorklist(Op0.getNode());
}
if (VecVT != Op1.getValueType()) {
Op1 = DAG.getNode(ISD::BITCAST, SDLoc(N), VecVT, Op1);
DCI.AddToWorklist(Op1.getNode());
}
Op0 = DAG.getNode(ISD::BSWAP, SDLoc(N), VecVT, Op0);
DCI.AddToWorklist(Op0.getNode());
Op1 = DAG.getNode(ISD::BSWAP, SDLoc(N), VecVT, Op1);
DCI.AddToWorklist(Op1.getNode());
return DAG.getVectorShuffle(VecVT, SDLoc(N), Op0, Op1, SV->getMask());
}
}
return SDValue();
}
@ -5919,6 +6135,7 @@ SDValue SystemZTargetLowering::PerformDAGCombine(SDNode *N,
case SystemZISD::MERGE_LOW: return combineMERGE(N, DCI);
case ISD::LOAD: return combineLOAD(N, DCI);
case ISD::STORE: return combineSTORE(N, DCI);
case ISD::VECTOR_SHUFFLE: return combineVECTOR_SHUFFLE(N, DCI);
case ISD::EXTRACT_VECTOR_ELT: return combineEXTRACT_VECTOR_ELT(N, DCI);
case SystemZISD::JOIN_DWORDS: return combineJOIN_DWORDS(N, DCI);
case ISD::FP_ROUND: return combineFP_ROUND(N, DCI);

View File

@ -281,6 +281,8 @@ enum NodeType : unsigned {
VISTR_CC,
VSTRC_CC,
VSTRCZ_CC,
VSTRS_CC,
VSTRSZ_CC,
// Test Data Class.
//
@ -340,6 +342,9 @@ enum NodeType : unsigned {
// Byte swapping load/store. Same operands as regular load/store.
LRV, STRV,
// Element swapping load/store. Same operands as regular load/store.
VLER, VSTER,
// Prefetch from the second operand using the 4-bit control code in
// the first operand. The code is 1 for a load prefetch and 2 for
// a store prefetch.
@ -571,6 +576,9 @@ private:
SDValue lowerPREFETCH(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
bool isVectorElementLoad(SDValue Op) const;
SDValue buildVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
SmallVectorImpl<SDValue> &Elems) const;
SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const;
@ -590,8 +598,10 @@ private:
SDValue combineSIGN_EXTEND(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineSIGN_EXTEND_INREG(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineMERGE(SDNode *N, DAGCombinerInfo &DCI) const;
bool canLoadStoreByteSwapped(EVT VT) const;
SDValue combineLOAD(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineSTORE(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineVECTOR_SHUFFLE(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineEXTRACT_VECTOR_ELT(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineJOIN_DWORDS(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineFP_ROUND(SDNode *N, DAGCombinerInfo &DCI) const;

View File

@ -1414,13 +1414,15 @@ class InstVRRi<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
bits<4> R1;
bits<5> V2;
bits<4> M3;
bits<4> M4;
let Inst{47-40} = op{15-8};
let Inst{39-36} = R1;
let Inst{35-32} = V2{3-0};
let Inst{31-24} = 0;
let Inst{23-20} = M3;
let Inst{19-12} = 0;
let Inst{19-16} = M4;
let Inst{15-12} = 0;
let Inst{11} = 0;
let Inst{10} = V2{4};
let Inst{9-8} = 0;
@ -2489,12 +2491,18 @@ class StoreVRX<string mnemonic, bits<16> opcode, SDPatternOperator operator,
TypedReg tr, bits<5> bytes, bits<4> type = 0>
: InstVRX<opcode, (outs), (ins tr.op:$V1, bdxaddr12only:$XBD2),
mnemonic#"\t$V1, $XBD2",
[(set (tr.vt tr.op:$V1), (operator bdxaddr12only:$XBD2))]> {
[(operator (tr.vt tr.op:$V1), bdxaddr12only:$XBD2)]> {
let M3 = type;
let mayStore = 1;
let AccessBytes = bytes;
}
class StoreVRXGeneric<string mnemonic, bits<16> opcode>
: InstVRX<opcode, (outs), (ins VR128:$V1, bdxaddr12only:$XBD2, imm32zx4:$M3),
mnemonic#"\t$V1, $XBD2, $M3", []> {
let mayStore = 1;
}
multiclass StoreVRXAlign<string mnemonic, bits<16> opcode> {
let mayStore = 1, AccessBytes = 16 in {
def Align : InstVRX<opcode, (outs),
@ -3151,6 +3159,11 @@ class BinaryRRFb<string mnemonic, bits<16> opcode, SDPatternOperator operator,
let M4 = 0;
}
class BinaryRRFc<string mnemonic, bits<16> opcode,
RegisterOperand cls1, RegisterOperand cls2>
: InstRRFc<opcode, (outs cls1:$R1), (ins cls2:$R2, imm32zx4:$M3),
mnemonic#"\t$R1, $R2, $M3", []>;
class BinaryMemRRFc<string mnemonic, bits<16> opcode,
RegisterOperand cls1, RegisterOperand cls2, Immediate imm>
: InstRRFc<opcode, (outs cls2:$R2, cls1:$R1), (ins cls1:$R1src, imm:$M3),
@ -3218,6 +3231,41 @@ multiclass CondBinaryRRFPair<string mnemonic, bits<16> opcode,
def Asm : AsmCondBinaryRRF<mnemonic, opcode, cls1, cls2>;
}
class CondBinaryRRFa<string mnemonic, bits<16> opcode, RegisterOperand cls1,
RegisterOperand cls2, RegisterOperand cls3>
: InstRRFa<opcode, (outs cls1:$R1),
(ins cls3:$R3, cls2:$R2, cond4:$valid, cond4:$M4),
mnemonic#"$M4\t$R1, $R2, $R3",
[(set cls1:$R1, (z_select_ccmask cls2:$R2, cls3:$R3,
cond4:$valid, cond4:$M4))]> {
let CCMaskLast = 1;
}
// Like CondBinaryRRFa, but used for the raw assembly form. The condition-code
// mask is the third operand rather than being part of the mnemonic.
class AsmCondBinaryRRFa<string mnemonic, bits<16> opcode, RegisterOperand cls1,
RegisterOperand cls2, RegisterOperand cls3>
: InstRRFa<opcode, (outs cls1:$R1), (ins cls3:$R3, cls2:$R2, imm32zx4:$M4),
mnemonic#"\t$R1, $R2, $R3, $M4", []>;
// Like CondBinaryRRFa, but with a fixed CC mask.
class FixedCondBinaryRRFa<CondVariant V, string mnemonic, bits<16> opcode,
RegisterOperand cls1, RegisterOperand cls2,
RegisterOperand cls3>
: InstRRFa<opcode, (outs cls1:$R1), (ins cls3:$R3, cls2:$R2),
mnemonic#V.suffix#"\t$R1, $R2, $R3", []> {
let isAsmParserOnly = V.alternate;
let M4 = V.ccmask;
}
multiclass CondBinaryRRFaPair<string mnemonic, bits<16> opcode,
RegisterOperand cls1, RegisterOperand cls2,
RegisterOperand cls3> {
let isCodeGenOnly = 1 in
def "" : CondBinaryRRFa<mnemonic, opcode, cls1, cls2, cls3>;
def Asm : AsmCondBinaryRRFa<mnemonic, opcode, cls1, cls2, cls3>;
}
class BinaryRI<string mnemonic, bits<12> opcode, SDPatternOperator operator,
RegisterOperand cls, Immediate imm>
: InstRIa<opcode, (outs cls:$R1), (ins cls:$R1src, imm:$I2),
@ -3612,7 +3660,9 @@ class BinaryVRRf<string mnemonic, bits<16> opcode, SDPatternOperator operator,
class BinaryVRRi<string mnemonic, bits<16> opcode, RegisterOperand cls>
: InstVRRi<opcode, (outs cls:$R1), (ins VR128:$V2, imm32zx4:$M3),
mnemonic#"\t$R1, $V2, $M3", []>;
mnemonic#"\t$R1, $V2, $M3", []> {
let M4 = 0;
}
class BinaryVRSa<string mnemonic, bits<16> opcode, SDPatternOperator operator,
TypedReg tr1, TypedReg tr2, bits<4> type>
@ -3990,6 +4040,17 @@ class SideEffectTernaryRRFa<string mnemonic, bits<16> opcode,
let M4 = 0;
}
class SideEffectTernaryMemMemRRFa<string mnemonic, bits<16> opcode,
RegisterOperand cls1, RegisterOperand cls2,
RegisterOperand cls3>
: InstRRFa<opcode, (outs cls1:$R1, cls2:$R2),
(ins cls1:$R1src, cls2:$R2src, cls3:$R3),
mnemonic#"\t$R1, $R2, $R3", []> {
let Constraints = "$R1 = $R1src, $R2 = $R2src";
let DisableEncoding = "$R1src, $R2src";
let M4 = 0;
}
class SideEffectTernaryRRFb<string mnemonic, bits<16> opcode,
RegisterOperand cls1, RegisterOperand cls2,
RegisterOperand cls3>
@ -4278,7 +4339,7 @@ class TernaryVRRcFloatGeneric<string mnemonic, bits<16> opcode>
mnemonic#"\t$V1, $V2, $V3, $M4, $M5, $M6", []>;
class TernaryVRRd<string mnemonic, bits<16> opcode, SDPatternOperator operator,
TypedReg tr1, TypedReg tr2, bits<4> type = 0>
TypedReg tr1, TypedReg tr2, bits<4> type = 0, bits<4> m6 = 0>
: InstVRRd<opcode, (outs tr1.op:$V1),
(ins tr2.op:$V2, tr2.op:$V3, tr1.op:$V4),
mnemonic#"\t$V1, $V2, $V3, $V4",
@ -4286,7 +4347,7 @@ class TernaryVRRd<string mnemonic, bits<16> opcode, SDPatternOperator operator,
(tr2.vt tr2.op:$V3),
(tr1.vt tr1.op:$V4)))]> {
let M5 = type;
let M6 = 0;
let M6 = m6;
}
class TernaryVRRdGeneric<string mnemonic, bits<16> opcode>
@ -4296,6 +4357,34 @@ class TernaryVRRdGeneric<string mnemonic, bits<16> opcode>
let M6 = 0;
}
// Ternary operation where the assembler mnemonic has an extra operand to
// optionally allow specifiying arbitrary M6 values.
multiclass TernaryExtraVRRd<string mnemonic, bits<16> opcode,
SDPatternOperator operator,
TypedReg tr1, TypedReg tr2, bits<4> type> {
let M5 = type, Defs = [CC] in
def "" : InstVRRd<opcode, (outs tr1.op:$V1),
(ins tr2.op:$V2, tr2.op:$V3, tr1.op:$V4, imm32zx4:$M6),
mnemonic#"\t$V1, $V2, $V3, $V4, $M6", []>;
def : Pat<(operator (tr2.vt tr2.op:$V2), (tr2.vt tr2.op:$V3),
(tr1.vt tr1.op:$V4)),
(!cast<Instruction>(NAME) tr2.op:$V2, tr2.op:$V3, tr1.op:$V4, 0)>;
def : InstAlias<mnemonic#"\t$V1, $V2, $V3, $V4",
(!cast<Instruction>(NAME) tr1.op:$V1, tr2.op:$V2,
tr2.op:$V3, tr1.op:$V4, 0)>;
}
multiclass TernaryExtraVRRdGeneric<string mnemonic, bits<16> opcode> {
let Defs = [CC] in
def "" : InstVRRd<opcode, (outs VR128:$V1),
(ins VR128:$V2, VR128:$V3, VR128:$V4,
imm32zx4:$M5, imm32zx4:$M6),
mnemonic#"\t$V1, $V2, $V3, $V4, $M5, $M6", []>;
def : InstAlias<mnemonic#"\t$V1, $V2, $V3, $V4, $M5",
(!cast<Instruction>(NAME) VR128:$V1, VR128:$V2, VR128:$V3,
VR128:$V4, imm32zx4:$M5, 0)>;
}
class TernaryVRRe<string mnemonic, bits<16> opcode, SDPatternOperator operator,
TypedReg tr1, TypedReg tr2, bits<4> m5 = 0, bits<4> type = 0>
: InstVRRe<opcode, (outs tr1.op:$V1),
@ -4326,6 +4415,11 @@ class TernaryVRSb<string mnemonic, bits<16> opcode, SDPatternOperator operator,
let M4 = type;
}
class TernaryVRRi<string mnemonic, bits<16> opcode, RegisterOperand cls>
: InstVRRi<opcode, (outs cls:$R1), (ins VR128:$V2,
imm32zx4:$M3, imm32zx4:$M4),
mnemonic#"\t$R1, $V2, $M3, $M4", []>;
class TernaryVRSbGeneric<string mnemonic, bits<16> opcode>
: InstVRSb<opcode, (outs VR128:$V1),
(ins VR128:$V1src, GR64:$R3, shift12only:$BD2, imm32zx4:$M4),
@ -4705,6 +4799,17 @@ class CondBinaryRRFPseudo<RegisterOperand cls1, RegisterOperand cls2>
let CCMaskLast = 1;
}
// Like CondBinaryRRFa, but expanded after RA depending on the choice of
// register.
class CondBinaryRRFaPseudo<RegisterOperand cls1, RegisterOperand cls2,
RegisterOperand cls3>
: Pseudo<(outs cls1:$R1),
(ins cls3:$R3, cls2:$R2, cond4:$valid, cond4:$M4),
[(set cls1:$R1, (z_select_ccmask cls2:$R2, cls3:$R3,
cond4:$valid, cond4:$M4))]> {
let CCMaskLast = 1;
}
// Like CondBinaryRIE, but expanded after RA depending on the choice of
// register.
class CondBinaryRIEPseudo<RegisterOperand cls, Immediate imm>

View File

@ -223,6 +223,65 @@ void SystemZInstrInfo::expandLOCRPseudo(MachineInstr &MI, unsigned LowOpcode,
// correctly. This change is defered to the SystemZExpandPseudo pass.
}
// MI is a select pseudo instruction. Replace it with LowOpcode if source
// and destination are all low GR32s and HighOpcode if source and destination
// are all high GR32s. Otherwise, use the two-operand MixedOpcode.
void SystemZInstrInfo::expandSELRPseudo(MachineInstr &MI, unsigned LowOpcode,
unsigned HighOpcode,
unsigned MixedOpcode) const {
unsigned DestReg = MI.getOperand(0).getReg();
unsigned Src1Reg = MI.getOperand(1).getReg();
unsigned Src2Reg = MI.getOperand(2).getReg();
bool DestIsHigh = isHighReg(DestReg);
bool Src1IsHigh = isHighReg(Src1Reg);
bool Src2IsHigh = isHighReg(Src2Reg);
// If sources and destination aren't all high or all low, we may be able to
// simplify the operation by moving one of the sources to the destination
// first. But only if this doesn't clobber the other source.
if (DestReg != Src1Reg && DestReg != Src2Reg) {
if (DestIsHigh != Src1IsHigh) {
emitGRX32Move(*MI.getParent(), MI, MI.getDebugLoc(), DestReg, Src1Reg,
SystemZ::LR, 32, MI.getOperand(1).isKill(),
MI.getOperand(1).isUndef());
MI.getOperand(1).setReg(DestReg);
Src1Reg = DestReg;
Src1IsHigh = DestIsHigh;
} else if (DestIsHigh != Src2IsHigh) {
emitGRX32Move(*MI.getParent(), MI, MI.getDebugLoc(), DestReg, Src2Reg,
SystemZ::LR, 32, MI.getOperand(2).isKill(),
MI.getOperand(2).isUndef());
MI.getOperand(2).setReg(DestReg);
Src2Reg = DestReg;
Src2IsHigh = DestIsHigh;
}
}
// If the destination (now) matches one source, prefer this to be first.
if (DestReg != Src1Reg && DestReg == Src2Reg) {
commuteInstruction(MI, false, 1, 2);
std::swap(Src1Reg, Src2Reg);
std::swap(Src1IsHigh, Src2IsHigh);
}
if (!DestIsHigh && !Src1IsHigh && !Src2IsHigh)
MI.setDesc(get(LowOpcode));
else if (DestIsHigh && Src1IsHigh && Src2IsHigh)
MI.setDesc(get(HighOpcode));
else {
// Given the simplifcation above, we must already have a two-operand case.
assert (DestReg == Src1Reg);
MI.setDesc(get(MixedOpcode));
MI.tieOperands(0, 1);
LOCRMuxJumps++;
}
// If we were unable to implement the pseudo with a single instruction, we
// need to convert it back into a branch sequence. This cannot be done here
// since the caller of expandPostRAPseudo does not handle changes to the CFG
// correctly. This change is defered to the SystemZExpandPseudo pass.
}
// MI is an RR-style pseudo instruction that zero-extends the low Size bits
// of one GRX32 into another. Replace it with LowOpcode if both operands
// are low registers, otherwise use RISB[LH]G.
@ -312,6 +371,10 @@ MachineInstr *SystemZInstrInfo::commuteInstructionImpl(MachineInstr &MI,
};
switch (MI.getOpcode()) {
case SystemZ::SELRMux:
case SystemZ::SELFHR:
case SystemZ::SELR:
case SystemZ::SELGR:
case SystemZ::LOCRMux:
case SystemZ::LOCFHR:
case SystemZ::LOCR:
@ -606,7 +669,9 @@ void SystemZInstrInfo::insertSelect(MachineBasicBlock &MBB,
unsigned Opc;
if (SystemZ::GRX32BitRegClass.hasSubClassEq(RC)) {
if (STI.hasLoadStoreOnCond2())
if (STI.hasMiscellaneousExtensions3())
Opc = SystemZ::SELRMux;
else if (STI.hasLoadStoreOnCond2())
Opc = SystemZ::LOCRMux;
else {
Opc = SystemZ::LOCR;
@ -618,9 +683,12 @@ void SystemZInstrInfo::insertSelect(MachineBasicBlock &MBB,
TrueReg = TReg;
FalseReg = FReg;
}
} else if (SystemZ::GR64BitRegClass.hasSubClassEq(RC))
Opc = SystemZ::LOCGR;
else
} else if (SystemZ::GR64BitRegClass.hasSubClassEq(RC)) {
if (STI.hasMiscellaneousExtensions3())
Opc = SystemZ::SELGR;
else
Opc = SystemZ::LOCGR;
} else
llvm_unreachable("Invalid register class");
BuildMI(MBB, I, DL, get(Opc), DstReg)
@ -643,7 +711,11 @@ bool SystemZInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
unsigned NewUseOpc;
unsigned UseIdx;
int CommuteIdx = -1;
bool TieOps = false;
switch (UseOpc) {
case SystemZ::SELRMux:
TieOps = true;
/* fall through */
case SystemZ::LOCRMux:
if (!STI.hasLoadStoreOnCond2())
return false;
@ -655,6 +727,9 @@ bool SystemZInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
else
return false;
break;
case SystemZ::SELGR:
TieOps = true;
/* fall through */
case SystemZ::LOCGR:
if (!STI.hasLoadStoreOnCond2())
return false;
@ -676,6 +751,8 @@ bool SystemZInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
bool DeleteDef = MRI->hasOneNonDBGUse(Reg);
UseMI.setDesc(get(NewUseOpc));
if (TieOps)
UseMI.tieOperands(0, 1);
UseMI.getOperand(UseIdx).ChangeToImmediate(ImmVal);
if (DeleteDef)
DefMI.eraseFromParent();
@ -1285,6 +1362,11 @@ bool SystemZInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
expandLOCRPseudo(MI, SystemZ::LOCR, SystemZ::LOCFHR);
return true;
case SystemZ::SELRMux:
expandSELRPseudo(MI, SystemZ::SELR, SystemZ::SELFHR,
SystemZ::LOCRMux);
return true;
case SystemZ::STCMux:
expandRXYPseudo(MI, SystemZ::STC, SystemZ::STCH);
return true;

View File

@ -162,6 +162,8 @@ class SystemZInstrInfo : public SystemZGenInstrInfo {
unsigned HighOpcode) const;
void expandLOCRPseudo(MachineInstr &MI, unsigned LowOpcode,
unsigned HighOpcode) const;
void expandSELRPseudo(MachineInstr &MI, unsigned LowOpcode,
unsigned HighOpcode, unsigned MixedOpcode) const;
void expandZExtPseudo(MachineInstr &MI, unsigned LowOpcode,
unsigned Size) const;
void expandLoadStackGuard(MachineInstr *MI) const;

View File

@ -474,6 +474,11 @@ let mayLoad = 1, mayStore = 1, Defs = [CC] in {
def MVCLU : SideEffectTernaryMemMemRSY<"mvclu", 0xEB8E, GR128, GR128>;
}
// Move right.
let Predicates = [FeatureMiscellaneousExtensions3],
mayLoad = 1, mayStore = 1, Uses = [R0L] in
def MVCRL : SideEffectBinarySSE<"mvcrl", 0xE50A>;
// String moves.
let mayLoad = 1, mayStore = 1, Defs = [CC] in
defm MVST : StringRRE<"mvst", 0xB255, z_stpcpy>;
@ -482,6 +487,29 @@ let mayLoad = 1, mayStore = 1, Defs = [CC] in
// Conditional move instructions
//===----------------------------------------------------------------------===//
let Predicates = [FeatureMiscellaneousExtensions3], Uses = [CC] in {
// Select.
let isCommutable = 1 in {
// Expands to SELR or SELFHR or a branch-and-move sequence,
// depending on the choice of registers.
def SELRMux : CondBinaryRRFaPseudo<GRX32, GRX32, GRX32>;
defm SELFHR : CondBinaryRRFaPair<"selfhr", 0xB9C0, GRH32, GRH32, GRH32>;
defm SELR : CondBinaryRRFaPair<"selr", 0xB9F0, GR32, GR32, GR32>;
defm SELGR : CondBinaryRRFaPair<"selgr", 0xB9E3, GR64, GR64, GR64>;
}
// Define AsmParser extended mnemonics for each general condition-code mask.
foreach V = [ "E", "NE", "H", "NH", "L", "NL", "HE", "NHE", "LE", "NLE",
"Z", "NZ", "P", "NP", "M", "NM", "LH", "NLH", "O", "NO" ] in {
def SELRAsm#V : FixedCondBinaryRRFa<CV<V>, "selr", 0xB9F0,
GR32, GR32, GR32>;
def SELFHRAsm#V : FixedCondBinaryRRFa<CV<V>, "selfhr", 0xB9C0,
GRH32, GRH32, GRH32>;
def SELGRAsm#V : FixedCondBinaryRRFa<CV<V>, "selgr", 0xB9E3,
GR64, GR64, GR64>;
}
}
let Predicates = [FeatureLoadStoreOnCond2], Uses = [CC] in {
// Load immediate on condition. Matched via DAG pattern and created
// by the PeepholeOptimizer via FoldImmediate.
@ -1243,6 +1271,43 @@ let Defs = [CC] in {
defm : RMWIByte<xor, bdaddr12pair, XI>;
defm : RMWIByte<xor, bdaddr20pair, XIY>;
//===----------------------------------------------------------------------===//
// Combined logical operations
//===----------------------------------------------------------------------===//
let Predicates = [FeatureMiscellaneousExtensions3],
Defs = [CC] in {
// AND with complement.
let CCValues = 0xC, CompareZeroCCMask = 0x8 in {
def NCRK : BinaryRRFa<"ncrk", 0xB9F5, andc, GR32, GR32, GR32>;
def NCGRK : BinaryRRFa<"ncgrk", 0xB9E5, andc, GR64, GR64, GR64>;
}
// OR with complement.
let CCValues = 0xC, CompareZeroCCMask = 0x8 in {
def OCRK : BinaryRRFa<"ocrk", 0xB975, orc, GR32, GR32, GR32>;
def OCGRK : BinaryRRFa<"ocgrk", 0xB965, orc, GR64, GR64, GR64>;
}
// NAND.
let isCommutable = 1, CCValues = 0xC, CompareZeroCCMask = 0x8 in {
def NNRK : BinaryRRFa<"nnrk", 0xB974, nand, GR32, GR32, GR32>;
def NNGRK : BinaryRRFa<"nngrk", 0xB964, nand, GR64, GR64, GR64>;
}
// NOR.
let isCommutable = 1, CCValues = 0xC, CompareZeroCCMask = 0x8 in {
def NORK : BinaryRRFa<"nork", 0xB976, nor, GR32, GR32, GR32>;
def NOGRK : BinaryRRFa<"nogrk", 0xB966, nor, GR64, GR64, GR64>;
}
// NXOR.
let isCommutable = 1, CCValues = 0xC, CompareZeroCCMask = 0x8 in {
def NXRK : BinaryRRFa<"nxrk", 0xB977, nxor, GR32, GR32, GR32>;
def NXGRK : BinaryRRFa<"nxgrk", 0xB967, nxor, GR64, GR64, GR64>;
}
}
//===----------------------------------------------------------------------===//
// Multiplication
//===----------------------------------------------------------------------===//
@ -1837,6 +1902,9 @@ let mayLoad = 1, mayStore = 1, Uses = [R0L, R1D], Defs = [CC] in {
let Predicates = [FeatureMessageSecurityAssist8] in
def KMA : SideEffectTernaryMemMemMemRRFb<"kma", 0xB929,
GR128, GR128, GR128>;
let Predicates = [FeatureMessageSecurityAssist9] in
def KDSA : SideEffectBinaryMemRRE<"kdsa", 0xB93A, GR64, GR128>;
}
//===----------------------------------------------------------------------===//
@ -2017,7 +2085,12 @@ let Defs = [CC] in
def : Pat<(ctlz GR64:$src),
(EXTRACT_SUBREG (FLOGR GR64:$src), subreg_h64)>;
// Population count. Counts bits set per byte.
// Population count. Counts bits set per byte or doubleword.
let Predicates = [FeatureMiscellaneousExtensions3] in {
let Defs = [CC] in
def POPCNTOpt : BinaryRRFc<"popcnt", 0xB9E1, GR64, GR64>;
def : Pat<(ctpop GR64:$src), (POPCNTOpt GR64:$src, 8)>;
}
let Predicates = [FeaturePopulationCount], Defs = [CC] in
def POPCNT : UnaryRRE<"popcnt", 0xB9E1, z_popcnt, GR64, GR64>;
@ -2048,6 +2121,17 @@ let mayLoad = 1, Defs = [CC] in
let mayLoad = 1, mayStore = 1, Defs = [CC, R1D], Uses = [R0L, R1D] in
def CMPSC : SideEffectBinaryMemMemRRE<"cmpsc", 0xB263, GR128, GR128>;
// Sort lists.
let Predicates = [FeatureEnhancedSort],
mayLoad = 1, mayStore = 1, Defs = [CC], Uses = [R0L, R1D] in
def SORTL : SideEffectBinaryMemMemRRE<"sortl", 0xB938, GR128, GR128>;
// Deflate conversion call.
let Predicates = [FeatureDeflateConversion],
mayLoad = 1, mayStore = 1, Defs = [CC], Uses = [R0L, R1D] in
def DFLTCC : SideEffectTernaryMemMemRRFa<"dfltcc", 0xB939,
GR128, GR128, GR64>;
// Execute.
let hasSideEffects = 1 in {
def EX : SideEffectBinaryRX<"ex", 0x44, GR64>;

View File

@ -248,6 +248,81 @@ let Predicates = [FeatureVectorPackedDecimal] in {
def VSTRLR : StoreLengthVRSd<"vstrlr", 0xE63F, int_s390_vstrl, 0>;
}
//===----------------------------------------------------------------------===//
// Byte swaps
//===----------------------------------------------------------------------===//
let Predicates = [FeatureVectorEnhancements2] in {
// Load byte-reversed elements.
def VLBR : UnaryVRXGeneric<"vlbr", 0xE606>;
def VLBRH : UnaryVRX<"vlbrh", 0xE606, z_loadbswap, v128h, 16, 1>;
def VLBRF : UnaryVRX<"vlbrf", 0xE606, z_loadbswap, v128f, 16, 2>;
def VLBRG : UnaryVRX<"vlbrg", 0xE606, z_loadbswap, v128g, 16, 3>;
def VLBRQ : UnaryVRX<"vlbrq", 0xE606, null_frag, v128q, 16, 4>;
// Load elements reversed.
def VLER : UnaryVRXGeneric<"vler", 0xE607>;
def VLERH : UnaryVRX<"vlerh", 0xE607, z_loadeswap, v128h, 16, 1>;
def VLERF : UnaryVRX<"vlerf", 0xE607, z_loadeswap, v128f, 16, 2>;
def VLERG : UnaryVRX<"vlerg", 0xE607, z_loadeswap, v128g, 16, 3>;
def : Pat<(v4f32 (z_loadeswap bdxaddr12only:$addr)),
(VLERF bdxaddr12only:$addr)>;
def : Pat<(v2f64 (z_loadeswap bdxaddr12only:$addr)),
(VLERG bdxaddr12only:$addr)>;
def : Pat<(v16i8 (z_loadeswap bdxaddr12only:$addr)),
(VLBRQ bdxaddr12only:$addr)>;
// Load byte-reversed element.
def VLEBRH : TernaryVRX<"vlebrh", 0xE601, z_vlebri16, v128h, v128h, 2, imm32zx3>;
def VLEBRF : TernaryVRX<"vlebrf", 0xE603, z_vlebri32, v128f, v128f, 4, imm32zx2>;
def VLEBRG : TernaryVRX<"vlebrg", 0xE602, z_vlebri64, v128g, v128g, 8, imm32zx1>;
// Load byte-reversed element and zero.
def VLLEBRZ : UnaryVRXGeneric<"vllebrz", 0xE604>;
def VLLEBRZH : UnaryVRX<"vllebrzh", 0xE604, z_vllebrzi16, v128h, 2, 1>;
def VLLEBRZF : UnaryVRX<"vllebrzf", 0xE604, z_vllebrzi32, v128f, 4, 2>;
def VLLEBRZG : UnaryVRX<"vllebrzg", 0xE604, z_vllebrzi64, v128g, 8, 3>;
def VLLEBRZE : UnaryVRX<"vllebrze", 0xE604, z_vllebrzli32, v128f, 4, 6>;
def : InstAlias<"lerv\t$V1, $XBD2",
(VLLEBRZE VR128:$V1, bdxaddr12only:$XBD2), 0>;
def : InstAlias<"ldrv\t$V1, $XBD2",
(VLLEBRZG VR128:$V1, bdxaddr12only:$XBD2), 0>;
// Load byte-reversed element and replicate.
def VLBRREP : UnaryVRXGeneric<"vlbrrep", 0xE605>;
def VLBRREPH : UnaryVRX<"vlbrreph", 0xE605, z_replicate_loadbswapi16, v128h, 2, 1>;
def VLBRREPF : UnaryVRX<"vlbrrepf", 0xE605, z_replicate_loadbswapi32, v128f, 4, 2>;
def VLBRREPG : UnaryVRX<"vlbrrepg", 0xE605, z_replicate_loadbswapi64, v128g, 8, 3>;
// Store byte-reversed elements.
def VSTBR : StoreVRXGeneric<"vstbr", 0xE60E>;
def VSTBRH : StoreVRX<"vstbrh", 0xE60E, z_storebswap, v128h, 16, 1>;
def VSTBRF : StoreVRX<"vstbrf", 0xE60E, z_storebswap, v128f, 16, 2>;
def VSTBRG : StoreVRX<"vstbrg", 0xE60E, z_storebswap, v128g, 16, 3>;
def VSTBRQ : StoreVRX<"vstbrq", 0xE60E, null_frag, v128q, 16, 4>;
// Store elements reversed.
def VSTER : StoreVRXGeneric<"vster", 0xE60F>;
def VSTERH : StoreVRX<"vsterh", 0xE60F, z_storeeswap, v128h, 16, 1>;
def VSTERF : StoreVRX<"vsterf", 0xE60F, z_storeeswap, v128f, 16, 2>;
def VSTERG : StoreVRX<"vsterg", 0xE60F, z_storeeswap, v128g, 16, 3>;
def : Pat<(z_storeeswap (v4f32 VR128:$val), bdxaddr12only:$addr),
(VSTERF VR128:$val, bdxaddr12only:$addr)>;
def : Pat<(z_storeeswap (v2f64 VR128:$val), bdxaddr12only:$addr),
(VSTERG VR128:$val, bdxaddr12only:$addr)>;
def : Pat<(z_storeeswap (v16i8 VR128:$val), bdxaddr12only:$addr),
(VSTBRQ VR128:$val, bdxaddr12only:$addr)>;
// Store byte-reversed element.
def VSTEBRH : StoreBinaryVRX<"vstebrh", 0xE609, z_vstebri16, v128h, 2, imm32zx3>;
def VSTEBRF : StoreBinaryVRX<"vstebrf", 0xE60B, z_vstebri32, v128f, 4, imm32zx2>;
def VSTEBRG : StoreBinaryVRX<"vstebrg", 0xE60A, z_vstebri64, v128g, 8, imm32zx1>;
def : InstAlias<"sterv\t$V1, $XBD2",
(VSTEBRF VR128:$V1, bdxaddr12only:$XBD2, 0), 0>;
def : InstAlias<"stdrv\t$V1, $XBD2",
(VSTEBRG VR128:$V1, bdxaddr12only:$XBD2, 0), 0>;
}
//===----------------------------------------------------------------------===//
// Selects and permutes
//===----------------------------------------------------------------------===//
@ -706,6 +781,10 @@ let Predicates = [FeatureVector] in {
def : Pat<(int_s390_vsldb VR128:$x, VR128:$y, imm32zx8:$z),
(VSLDB VR128:$x, VR128:$y, imm32zx8:$z)>;
// Shift left double by bit.
let Predicates = [FeatureVectorEnhancements2] in
def VSLD : TernaryVRId<"vsld", 0xE786, int_s390_vsld, v128b, v128b, 0>;
// Shift right arithmetic.
def VSRA : BinaryVRRc<"vsra", 0xE77E, int_s390_vsra, v128b, v128b>;
@ -718,6 +797,10 @@ let Predicates = [FeatureVector] in {
// Shift right logical by byte.
def VSRLB : BinaryVRRc<"vsrlb", 0xE77D, int_s390_vsrlb, v128b, v128b>;
// Shift right double by bit.
let Predicates = [FeatureVectorEnhancements2] in
def VSRD : TernaryVRId<"vsrd", 0xE787, int_s390_vsrd, v128b, v128b, 0>;
// Subtract.
def VS : BinaryVRRcGeneric<"vs", 0xE7F7>;
def VSB : BinaryVRRc<"vsb", 0xE7F7, sub, v128b, v128b, 0>;
@ -945,23 +1028,41 @@ let Predicates = [FeatureVector] in {
}
}
// Convert from fixed 64-bit.
// Convert from fixed.
let Uses = [FPC], mayRaiseFPException = 1 in {
def VCDG : TernaryVRRaFloatGeneric<"vcdg", 0xE7C3>;
def VCDGB : TernaryVRRa<"vcdgb", 0xE7C3, null_frag, v128db, v128g, 3, 0>;
def WCDGB : TernaryVRRa<"wcdgb", 0xE7C3, null_frag, v64db, v64g, 3, 8>;
}
def : FPConversion<VCDGB, sint_to_fp, v128db, v128g, 0, 0>;
let Predicates = [FeatureVectorEnhancements2] in {
let Uses = [FPC], mayRaiseFPException = 1 in {
let isAsmParserOnly = 1 in
def VCFPS : TernaryVRRaFloatGeneric<"vcfps", 0xE7C3>;
def VCEFB : TernaryVRRa<"vcefb", 0xE7C3, null_frag, v128sb, v128g, 2, 0>;
def WCEFB : TernaryVRRa<"wcefb", 0xE7C3, null_frag, v32sb, v32f, 2, 8>;
}
def : FPConversion<VCEFB, sint_to_fp, v128sb, v128f, 0, 0>;
}
// Convert from logical 64-bit.
// Convert from logical.
let Uses = [FPC], mayRaiseFPException = 1 in {
def VCDLG : TernaryVRRaFloatGeneric<"vcdlg", 0xE7C1>;
def VCDLGB : TernaryVRRa<"vcdlgb", 0xE7C1, null_frag, v128db, v128g, 3, 0>;
def WCDLGB : TernaryVRRa<"wcdlgb", 0xE7C1, null_frag, v64db, v64g, 3, 8>;
}
def : FPConversion<VCDLGB, uint_to_fp, v128db, v128g, 0, 0>;
let Predicates = [FeatureVectorEnhancements2] in {
let Uses = [FPC], mayRaiseFPException = 1 in {
let isAsmParserOnly = 1 in
def VCFPL : TernaryVRRaFloatGeneric<"vcfpl", 0xE7C1>;
def VCELFB : TernaryVRRa<"vcelfb", 0xE7C1, null_frag, v128sb, v128g, 2, 0>;
def WCELFB : TernaryVRRa<"wcelfb", 0xE7C1, null_frag, v32sb, v32f, 2, 8>;
}
def : FPConversion<VCELFB, uint_to_fp, v128sb, v128f, 0, 0>;
}
// Convert to fixed 64-bit.
// Convert to fixed.
let Uses = [FPC], mayRaiseFPException = 1 in {
def VCGD : TernaryVRRaFloatGeneric<"vcgd", 0xE7C2>;
def VCGDB : TernaryVRRa<"vcgdb", 0xE7C2, null_frag, v128g, v128db, 3, 0>;
@ -969,8 +1070,18 @@ let Predicates = [FeatureVector] in {
}
// Rounding mode should agree with SystemZInstrFP.td.
def : FPConversion<VCGDB, fp_to_sint, v128g, v128db, 0, 5>;
let Predicates = [FeatureVectorEnhancements2] in {
let Uses = [FPC], mayRaiseFPException = 1 in {
let isAsmParserOnly = 1 in
def VCSFP : TernaryVRRaFloatGeneric<"vcsfp", 0xE7C2>;
def VCFEB : TernaryVRRa<"vcfeb", 0xE7C2, null_frag, v128sb, v128g, 2, 0>;
def WCFEB : TernaryVRRa<"wcfeb", 0xE7C2, null_frag, v32sb, v32f, 2, 8>;
}
// Rounding mode should agree with SystemZInstrFP.td.
def : FPConversion<VCFEB, fp_to_sint, v128f, v128sb, 0, 5>;
}
// Convert to logical 64-bit.
// Convert to logical.
let Uses = [FPC], mayRaiseFPException = 1 in {
def VCLGD : TernaryVRRaFloatGeneric<"vclgd", 0xE7C0>;
def VCLGDB : TernaryVRRa<"vclgdb", 0xE7C0, null_frag, v128g, v128db, 3, 0>;
@ -978,6 +1089,16 @@ let Predicates = [FeatureVector] in {
}
// Rounding mode should agree with SystemZInstrFP.td.
def : FPConversion<VCLGDB, fp_to_uint, v128g, v128db, 0, 5>;
let Predicates = [FeatureVectorEnhancements2] in {
let Uses = [FPC], mayRaiseFPException = 1 in {
let isAsmParserOnly = 1 in
def VCLFP : TernaryVRRaFloatGeneric<"vclfp", 0xE7C0>;
def VCLFEB : TernaryVRRa<"vclfeb", 0xE7C0, null_frag, v128sb, v128g, 2, 0>;
def WCLFEB : TernaryVRRa<"wclfeb", 0xE7C0, null_frag, v32sb, v32f, 2, 8>;
}
// Rounding mode should agree with SystemZInstrFP.td.
def : FPConversion<VCLFEB, fp_to_uint, v128f, v128sb, 0, 5>;
}
// Divide.
let Uses = [FPC], mayRaiseFPException = 1 in {
@ -1568,6 +1689,24 @@ let Predicates = [FeatureVector] in {
z_vstrcz_cc, v128f, v128f, 2, 2>;
}
let Predicates = [FeatureVectorEnhancements2] in {
defm VSTRS : TernaryExtraVRRdGeneric<"vstrs", 0xE78B>;
defm VSTRSB : TernaryExtraVRRd<"vstrsb", 0xE78B,
z_vstrs_cc, v128b, v128b, 0>;
defm VSTRSH : TernaryExtraVRRd<"vstrsh", 0xE78B,
z_vstrs_cc, v128b, v128h, 1>;
defm VSTRSF : TernaryExtraVRRd<"vstrsf", 0xE78B,
z_vstrs_cc, v128b, v128f, 2>;
let Defs = [CC] in {
def VSTRSZB : TernaryVRRd<"vstrszb", 0xE78B,
z_vstrsz_cc, v128b, v128b, 0, 2>;
def VSTRSZH : TernaryVRRd<"vstrszh", 0xE78B,
z_vstrsz_cc, v128b, v128h, 1, 2>;
def VSTRSZF : TernaryVRRd<"vstrszf", 0xE78B,
z_vstrsz_cc, v128b, v128f, 2, 2>;
}
}
//===----------------------------------------------------------------------===//
// Packed-decimal instructions
//===----------------------------------------------------------------------===//
@ -1579,6 +1718,10 @@ let Predicates = [FeatureVectorPackedDecimal] in {
def VUPKZ : StoreLengthVSI<"vupkz", 0xE63C, null_frag, 0>;
let Defs = [CC] in {
let Predicates = [FeatureVectorPackedDecimalEnhancement] in {
def VCVBOpt : TernaryVRRi<"vcvb", 0xE650, GR32>;
def VCVBGOpt : TernaryVRRi<"vcvbg", 0xE652, GR64>;
}
def VCVB : BinaryVRRi<"vcvb", 0xE650, GR32>;
def VCVBG : BinaryVRRi<"vcvbg", 0xE652, GR64>;
def VCVD : TernaryVRIi<"vcvd", 0xE658, GR32>;

View File

@ -191,6 +191,12 @@ def SDT_ZVecTernary : SDTypeProfile<1, 3,
SDTCisSameAs<0, 1>,
SDTCisSameAs<0, 2>,
SDTCisSameAs<0, 3>]>;
def SDT_ZVecTernaryConvCC : SDTypeProfile<2, 3,
[SDTCisVec<0>,
SDTCisVT<1, i32>,
SDTCisVec<2>,
SDTCisSameAs<2, 3>,
SDTCisSameAs<0, 4>]>;
def SDT_ZVecTernaryInt : SDTypeProfile<1, 3,
[SDTCisVec<0>,
SDTCisSameAs<0, 1>,
@ -278,6 +284,10 @@ def z_loadbswap : SDNode<"SystemZISD::LRV", SDTLoad,
[SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
def z_storebswap : SDNode<"SystemZISD::STRV", SDTStore,
[SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
def z_loadeswap : SDNode<"SystemZISD::VLER", SDTLoad,
[SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
def z_storeeswap : SDNode<"SystemZISD::VSTER", SDTStore,
[SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
def z_tdc : SDNode<"SystemZISD::TDC", SDT_ZTest>;
@ -337,6 +347,10 @@ def z_vstrc_cc : SDNode<"SystemZISD::VSTRC_CC",
SDT_ZVecQuaternaryIntCC>;
def z_vstrcz_cc : SDNode<"SystemZISD::VSTRCZ_CC",
SDT_ZVecQuaternaryIntCC>;
def z_vstrs_cc : SDNode<"SystemZISD::VSTRS_CC",
SDT_ZVecTernaryConvCC>;
def z_vstrsz_cc : SDNode<"SystemZISD::VSTRSZ_CC",
SDT_ZVecTernaryConvCC>;
def z_vftci : SDNode<"SystemZISD::VFTCI", SDT_ZVecBinaryConvIntCC>;
class AtomicWOp<string name, SDTypeProfile profile = SDT_ZAtomicLoadBinaryW>
@ -661,6 +675,18 @@ def z_usub : PatFrags<(ops node:$src1, node:$src2),
[(z_usubo node:$src1, node:$src2),
(sub node:$src1, node:$src2)]>;
// Combined logical operations.
def andc : PatFrag<(ops node:$src1, node:$src2),
(and node:$src1, (not node:$src2))>;
def orc : PatFrag<(ops node:$src1, node:$src2),
(or node:$src1, (not node:$src2))>;
def nand : PatFrag<(ops node:$src1, node:$src2),
(not (and node:$src1, node:$src2))>;
def nor : PatFrag<(ops node:$src1, node:$src2),
(not (or node:$src1, node:$src2))>;
def nxor : PatFrag<(ops node:$src1, node:$src2),
(not (xor node:$src1, node:$src2))>;
// Fused multiply-subtract, using the natural operand order.
def any_fms : PatFrag<(ops node:$src1, node:$src2, node:$src3),
(any_fma node:$src1, node:$src2, (fneg node:$src3))>;
@ -722,6 +748,10 @@ def z_replicate_loadi32 : z_replicate_load<i32, load>;
def z_replicate_loadi64 : z_replicate_load<i64, load>;
def z_replicate_loadf32 : z_replicate_load<f32, load>;
def z_replicate_loadf64 : z_replicate_load<f64, load>;
// Byte-swapped replicated vector element loads.
def z_replicate_loadbswapi16 : z_replicate_load<i32, z_loadbswap16>;
def z_replicate_loadbswapi32 : z_replicate_load<i32, z_loadbswap32>;
def z_replicate_loadbswapi64 : z_replicate_load<i64, z_loadbswap64>;
// Load a scalar and insert it into a single element of a vector.
class z_vle<ValueType scalartype, SDPatternOperator load>
@ -734,6 +764,10 @@ def z_vlei32 : z_vle<i32, load>;
def z_vlei64 : z_vle<i64, load>;
def z_vlef32 : z_vle<f32, load>;
def z_vlef64 : z_vle<f64, load>;
// Byte-swapped vector element loads.
def z_vlebri16 : z_vle<i32, z_loadbswap16>;
def z_vlebri32 : z_vle<i32, z_loadbswap32>;
def z_vlebri64 : z_vle<i64, z_loadbswap64>;
// Load a scalar and insert it into the low element of the high i64 of a
// zeroed vector.
@ -778,6 +812,18 @@ def z_vllezlf32 : PatFrag<(ops node:$addr),
(v2i64
(bitconvert (v4f32 immAllZerosV))))>;
// Byte-swapped variants.
def z_vllebrzi16 : z_vllez<i32, z_loadbswap16, 3>;
def z_vllebrzi32 : z_vllez<i32, z_loadbswap32, 1>;
def z_vllebrzli32 : z_vllez<i32, z_loadbswap32, 0>;
def z_vllebrzi64 : PatFrags<(ops node:$addr),
[(z_vector_insert immAllZerosV,
(i64 (z_loadbswap64 node:$addr)),
(i32 0)),
(z_join_dwords (i64 (z_loadbswap64 node:$addr)),
(i64 0))]>;
// Store one element of a vector.
class z_vste<ValueType scalartype, SDPatternOperator store>
: PatFrag<(ops node:$vec, node:$addr, node:$index),
@ -789,6 +835,10 @@ def z_vstei32 : z_vste<i32, store>;
def z_vstei64 : z_vste<i64, store>;
def z_vstef32 : z_vste<f32, store>;
def z_vstef64 : z_vste<f64, store>;
// Byte-swapped vector element stores.
def z_vstebri16 : z_vste<i32, z_storebswap16>;
def z_vstebri32 : z_vste<i32, z_storebswap32>;
def z_vstebri64 : z_vste<i64, z_storebswap64>;
// Arithmetic negation on vectors.
def z_vneg : PatFrag<(ops node:$x), (sub immAllZerosV, node:$x)>;

View File

@ -35,3 +35,5 @@ def : ProcessorModel<"z13", Z13Model, Arch11SupportedFeatures.List>;
def : ProcessorModel<"arch12", Z14Model, Arch12SupportedFeatures.List>;
def : ProcessorModel<"z14", Z14Model, Arch12SupportedFeatures.List>;
def : ProcessorModel<"arch13", Arch13Model, Arch13SupportedFeatures.List>;

View File

@ -96,17 +96,21 @@ SystemZRegisterInfo::getRegAllocationHints(unsigned VirtReg,
if (!DoneRegs.insert(Reg).second)
continue;
for (auto &Use : MRI->use_instructions(Reg)) {
for (auto &Use : MRI->reg_instructions(Reg)) {
// For LOCRMux, see if the other operand is already a high or low
// register, and in that case give the correpsonding hints for
// register, and in that case give the corresponding hints for
// VirtReg. LOCR instructions need both operands in either high or
// low parts.
if (Use.getOpcode() == SystemZ::LOCRMux) {
// low parts. Same handling for SELRMux.
if (Use.getOpcode() == SystemZ::LOCRMux ||
Use.getOpcode() == SystemZ::SELRMux) {
MachineOperand &TrueMO = Use.getOperand(1);
MachineOperand &FalseMO = Use.getOperand(2);
const TargetRegisterClass *RC =
TRI->getCommonSubClass(getRC32(FalseMO, VRM, MRI),
getRC32(TrueMO, VRM, MRI));
if (Use.getOpcode() == SystemZ::SELRMux)
RC = TRI->getCommonSubClass(RC,
getRC32(Use.getOperand(0), VRM, MRI));
if (RC && RC != &SystemZ::GRX32BitRegClass) {
addHints(Order, Hints, RC, MRI);
// Return true to make these hints the only regs available to

View File

@ -59,6 +59,7 @@ def VBU : SchedWrite; // Virtual branching unit
def MCD : SchedWrite; // Millicode
include "SystemZScheduleArch13.td"
include "SystemZScheduleZ14.td"
include "SystemZScheduleZ13.td"
include "SystemZScheduleZEC12.td"

File diff suppressed because it is too large Load Diff

View File

@ -46,6 +46,7 @@ private:
bool shortenOn001(MachineInstr &MI, unsigned Opcode);
bool shortenOn001AddCC(MachineInstr &MI, unsigned Opcode);
bool shortenFPConv(MachineInstr &MI, unsigned Opcode);
bool shortenSelect(MachineInstr &MI, unsigned Opcode);
const SystemZInstrInfo *TII;
const TargetRegisterInfo *TRI;
@ -175,6 +176,23 @@ bool SystemZShortenInst::shortenFPConv(MachineInstr &MI, unsigned Opcode) {
return false;
}
// MI is a three-operand select instruction. If one of the sources match
// the destination, convert to the equivalent load-on-condition.
bool SystemZShortenInst::shortenSelect(MachineInstr &MI, unsigned Opcode) {
if (MI.getOperand(0).getReg() == MI.getOperand(1).getReg()) {
MI.setDesc(TII->get(Opcode));
MI.tieOperands(0, 1);
return true;
}
if (MI.getOperand(0).getReg() == MI.getOperand(2).getReg()) {
TII->commuteInstruction(MI, false, 1, 2);
MI.setDesc(TII->get(Opcode));
MI.tieOperands(0, 1);
return true;
}
return false;
}
// Process all instructions in MBB. Return true if something changed.
bool SystemZShortenInst::processBlock(MachineBasicBlock &MBB) {
bool Changed = false;
@ -195,6 +213,18 @@ bool SystemZShortenInst::processBlock(MachineBasicBlock &MBB) {
Changed |= shortenIIF(MI, SystemZ::LLIHL, SystemZ::LLIHH);
break;
case SystemZ::SELR:
Changed |= shortenSelect(MI, SystemZ::LOCR);
break;
case SystemZ::SELFHR:
Changed |= shortenSelect(MI, SystemZ::LOCFHR);
break;
case SystemZ::SELGR:
Changed |= shortenSelect(MI, SystemZ::LOCGR);
break;
case SystemZ::WFADB:
Changed |= shortenOn001AddCC(MI, SystemZ::ADBR);
break;

View File

@ -55,6 +55,9 @@ SystemZSubtarget::SystemZSubtarget(const Triple &TT, const std::string &CPU,
HasMessageSecurityAssist7(false), HasMessageSecurityAssist8(false),
HasVectorEnhancements1(false), HasVectorPackedDecimal(false),
HasInsertReferenceBitsMultiple(false),
HasMiscellaneousExtensions3(false), HasMessageSecurityAssist9(false),
HasVectorEnhancements2(false), HasVectorPackedDecimalEnhancement(false),
HasEnhancedSort(false), HasDeflateConversion(false),
TargetTriple(TT), InstrInfo(initializeSubtargetDependencies(CPU, FS)),
TLInfo(TM, *this), TSInfo(), FrameLowering() {}

View File

@ -62,6 +62,12 @@ protected:
bool HasVectorEnhancements1;
bool HasVectorPackedDecimal;
bool HasInsertReferenceBitsMultiple;
bool HasMiscellaneousExtensions3;
bool HasMessageSecurityAssist9;
bool HasVectorEnhancements2;
bool HasVectorPackedDecimalEnhancement;
bool HasEnhancedSort;
bool HasDeflateConversion;
private:
Triple TargetTriple;
@ -209,6 +215,30 @@ public:
return HasInsertReferenceBitsMultiple;
}
// Return true if the target has the miscellaneous-extensions facility 3.
bool hasMiscellaneousExtensions3() const {
return HasMiscellaneousExtensions3;
}
// Return true if the target has the message-security-assist
// extension facility 9.
bool hasMessageSecurityAssist9() const { return HasMessageSecurityAssist9; }
// Return true if the target has the vector-enhancements facility 2.
bool hasVectorEnhancements2() const { return HasVectorEnhancements2; }
// Return true if the target has the vector-packed-decimal
// enhancement facility.
bool hasVectorPackedDecimalEnhancement() const {
return HasVectorPackedDecimalEnhancement;
}
// Return true if the target has the enhanced-sort facility.
bool hasEnhancedSort() const { return HasEnhancedSort; }
// Return true if the target has the deflate-conversion facility.
bool hasDeflateConversion() const { return HasDeflateConversion; }
// Return true if GV can be accessed using LARL for reloc model RM
// and code model CM.
bool isPC32DBLSymbol(const GlobalValue *GV, CodeModel::Model CM) const;

View File

@ -466,6 +466,27 @@ int SystemZTTIImpl::getArithmeticInstrCost(
if (Opcode == Instruction::FRem)
return LIBCALL_COST;
// Give discount for some combined logical operations if supported.
if (Args.size() == 2 && ST->hasMiscellaneousExtensions3()) {
if (Opcode == Instruction::Xor) {
for (const Value *A : Args) {
if (const Instruction *I = dyn_cast<Instruction>(A))
if (I->hasOneUse() &&
(I->getOpcode() == Instruction::And ||
I->getOpcode() == Instruction::Or ||
I->getOpcode() == Instruction::Xor))
return 0;
}
}
else if (Opcode == Instruction::Or || Opcode == Instruction::And) {
for (const Value *A : Args) {
if (const Instruction *I = dyn_cast<Instruction>(A))
if (I->hasOneUse() && I->getOpcode() == Instruction::Xor)
return 0;
}
}
}
// Or requires one instruction, although it has custom handling for i64.
if (Opcode == Instruction::Or)
return 1;
@ -686,9 +707,9 @@ int SystemZTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
// TODO: Fix base implementation which could simplify things a bit here
// (seems to miss on differentiating on scalar/vector types).
// Only 64 bit vector conversions are natively supported.
if (DstScalarBits == 64) {
if (SrcScalarBits == 64)
// Only 64 bit vector conversions are natively supported before arch13.
if (DstScalarBits == 64 || ST->hasVectorEnhancements2()) {
if (SrcScalarBits == DstScalarBits)
return NumDstVectors;
if (SrcScalarBits == 1)
@ -856,7 +877,7 @@ int SystemZTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
case Instruction::Select:
if (ValTy->isFloatingPointTy())
return 4; // No load on condition for FP - costs a conditional jump.
return 1; // Load On Condition.
return 1; // Load On Condition / Select Register.
}
}
@ -1009,7 +1030,8 @@ int SystemZTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
(Src->isVectorTy() ? getNumVectorRegs(Src) : getNumberOfParts(Src));
// Store/Load reversed saves one instruction.
if (!Src->isVectorTy() && NumOps == 1 && I != nullptr) {
if (((!Src->isVectorTy() && NumOps == 1) || ST->hasVectorEnhancements2()) &&
I != nullptr) {
if (Opcode == Instruction::Load && I->hasOneUse()) {
const Instruction *LdUser = cast<Instruction>(*I->user_begin());
// In case of load -> bswap -> store, return normal cost for the load.

View File

@ -1,4 +1,7 @@
; RUN: opt < %s -cost-model -analyze -mtriple=systemz-unknown -mcpu=z13 | FileCheck %s
; RUN: opt < %s -cost-model -analyze -mtriple=systemz-unknown -mcpu=z13 \
; RUN: | FileCheck %s -check-prefixes=CHECK,Z13
; RUN: opt < %s -cost-model -analyze -mtriple=systemz-unknown -mcpu=arch13 \
; RUN: | FileCheck %s -check-prefixes=CHECK,AR13
;
; Note: The scalarized vector instructions costs are not including any
; extracts, due to the undef operands.
@ -114,7 +117,8 @@ define void @fptosi() {
; CHECK: Cost Model: Found an estimated cost of 6 for instruction: %v18 = fptosi <2 x double> undef to <2 x i16>
; CHECK: Cost Model: Found an estimated cost of 6 for instruction: %v19 = fptosi <2 x double> undef to <2 x i8>
; CHECK: Cost Model: Found an estimated cost of 5 for instruction: %v20 = fptosi <2 x float> undef to <2 x i64>
; CHECK: Cost Model: Found an estimated cost of 12 for instruction: %v21 = fptosi <2 x float> undef to <2 x i32>
; Z13: Cost Model: Found an estimated cost of 12 for instruction: %v21 = fptosi <2 x float> undef to <2 x i32>
; AR13: Cost Model: Found an estimated cost of 1 for instruction: %v21 = fptosi <2 x float> undef to <2 x i32>
; CHECK: Cost Model: Found an estimated cost of 6 for instruction: %v22 = fptosi <2 x float> undef to <2 x i16>
; CHECK: Cost Model: Found an estimated cost of 6 for instruction: %v23 = fptosi <2 x float> undef to <2 x i8>
; CHECK: Cost Model: Found an estimated cost of 6 for instruction: %v24 = fptosi <4 x fp128> undef to <4 x i64>
@ -126,7 +130,8 @@ define void @fptosi() {
; CHECK: Cost Model: Found an estimated cost of 12 for instruction: %v30 = fptosi <4 x double> undef to <4 x i16>
; CHECK: Cost Model: Found an estimated cost of 12 for instruction: %v31 = fptosi <4 x double> undef to <4 x i8>
; CHECK: Cost Model: Found an estimated cost of 10 for instruction: %v32 = fptosi <4 x float> undef to <4 x i64>
; CHECK: Cost Model: Found an estimated cost of 12 for instruction: %v33 = fptosi <4 x float> undef to <4 x i32>
; Z13: Cost Model: Found an estimated cost of 12 for instruction: %v33 = fptosi <4 x float> undef to <4 x i32>
; AR13: Cost Model: Found an estimated cost of 1 for instruction: %v33 = fptosi <4 x float> undef to <4 x i32>
; CHECK: Cost Model: Found an estimated cost of 12 for instruction: %v34 = fptosi <4 x float> undef to <4 x i16>
; CHECK: Cost Model: Found an estimated cost of 12 for instruction: %v35 = fptosi <4 x float> undef to <4 x i8>
; CHECK: Cost Model: Found an estimated cost of 12 for instruction: %v36 = fptosi <8 x fp128> undef to <8 x i64>
@ -138,7 +143,8 @@ define void @fptosi() {
; CHECK: Cost Model: Found an estimated cost of 24 for instruction: %v42 = fptosi <8 x double> undef to <8 x i16>
; CHECK: Cost Model: Found an estimated cost of 24 for instruction: %v43 = fptosi <8 x double> undef to <8 x i8>
; CHECK: Cost Model: Found an estimated cost of 20 for instruction: %v44 = fptosi <8 x float> undef to <8 x i64>
; CHECK: Cost Model: Found an estimated cost of 24 for instruction: %v45 = fptosi <8 x float> undef to <8 x i32>
; Z13: Cost Model: Found an estimated cost of 24 for instruction: %v45 = fptosi <8 x float> undef to <8 x i32>
; AR13: Cost Model: Found an estimated cost of 2 for instruction: %v45 = fptosi <8 x float> undef to <8 x i32>
; CHECK: Cost Model: Found an estimated cost of 24 for instruction: %v46 = fptosi <8 x float> undef to <8 x i16>
; CHECK: Cost Model: Found an estimated cost of 24 for instruction: %v47 = fptosi <8 x float> undef to <8 x i8>
; CHECK: Cost Model: Found an estimated cost of 8 for instruction: %v48 = fptosi <16 x double> undef to <16 x i64>
@ -146,7 +152,8 @@ define void @fptosi() {
; CHECK: Cost Model: Found an estimated cost of 48 for instruction: %v50 = fptosi <16 x double> undef to <16 x i16>
; CHECK: Cost Model: Found an estimated cost of 48 for instruction: %v51 = fptosi <16 x double> undef to <16 x i8>
; CHECK: Cost Model: Found an estimated cost of 40 for instruction: %v52 = fptosi <16 x float> undef to <16 x i64>
; CHECK: Cost Model: Found an estimated cost of 48 for instruction: %v53 = fptosi <16 x float> undef to <16 x i32>
; Z13: Cost Model: Found an estimated cost of 48 for instruction: %v53 = fptosi <16 x float> undef to <16 x i32>
; AR13: Cost Model: Found an estimated cost of 4 for instruction: %v53 = fptosi <16 x float> undef to <16 x i32>
; CHECK: Cost Model: Found an estimated cost of 48 for instruction: %v54 = fptosi <16 x float> undef to <16 x i16>
; CHECK: Cost Model: Found an estimated cost of 48 for instruction: %v55 = fptosi <16 x float> undef to <16 x i8>
@ -233,7 +240,8 @@ define void @fptoui() {
; CHECK: Cost Model: Found an estimated cost of 6 for instruction: %v18 = fptoui <2 x double> undef to <2 x i16>
; CHECK: Cost Model: Found an estimated cost of 6 for instruction: %v19 = fptoui <2 x double> undef to <2 x i8>
; CHECK: Cost Model: Found an estimated cost of 5 for instruction: %v20 = fptoui <2 x float> undef to <2 x i64>
; CHECK: Cost Model: Found an estimated cost of 12 for instruction: %v21 = fptoui <2 x float> undef to <2 x i32>
; Z13: Cost Model: Found an estimated cost of 12 for instruction: %v21 = fptoui <2 x float> undef to <2 x i32>
; AR13: Cost Model: Found an estimated cost of 1 for instruction: %v21 = fptoui <2 x float> undef to <2 x i32>
; CHECK: Cost Model: Found an estimated cost of 6 for instruction: %v22 = fptoui <2 x float> undef to <2 x i16>
; CHECK: Cost Model: Found an estimated cost of 6 for instruction: %v23 = fptoui <2 x float> undef to <2 x i8>
; CHECK: Cost Model: Found an estimated cost of 6 for instruction: %v24 = fptoui <4 x fp128> undef to <4 x i64>
@ -245,7 +253,8 @@ define void @fptoui() {
; CHECK: Cost Model: Found an estimated cost of 12 for instruction: %v30 = fptoui <4 x double> undef to <4 x i16>
; CHECK: Cost Model: Found an estimated cost of 12 for instruction: %v31 = fptoui <4 x double> undef to <4 x i8>
; CHECK: Cost Model: Found an estimated cost of 10 for instruction: %v32 = fptoui <4 x float> undef to <4 x i64>
; CHECK: Cost Model: Found an estimated cost of 12 for instruction: %v33 = fptoui <4 x float> undef to <4 x i32>
; Z13: Cost Model: Found an estimated cost of 12 for instruction: %v33 = fptoui <4 x float> undef to <4 x i32>
; AR13: Cost Model: Found an estimated cost of 1 for instruction: %v33 = fptoui <4 x float> undef to <4 x i32>
; CHECK: Cost Model: Found an estimated cost of 12 for instruction: %v34 = fptoui <4 x float> undef to <4 x i16>
; CHECK: Cost Model: Found an estimated cost of 12 for instruction: %v35 = fptoui <4 x float> undef to <4 x i8>
; CHECK: Cost Model: Found an estimated cost of 12 for instruction: %v36 = fptoui <8 x fp128> undef to <8 x i64>
@ -257,7 +266,8 @@ define void @fptoui() {
; CHECK: Cost Model: Found an estimated cost of 24 for instruction: %v42 = fptoui <8 x double> undef to <8 x i16>
; CHECK: Cost Model: Found an estimated cost of 24 for instruction: %v43 = fptoui <8 x double> undef to <8 x i8>
; CHECK: Cost Model: Found an estimated cost of 20 for instruction: %v44 = fptoui <8 x float> undef to <8 x i64>
; CHECK: Cost Model: Found an estimated cost of 24 for instruction: %v45 = fptoui <8 x float> undef to <8 x i32>
; Z13: Cost Model: Found an estimated cost of 24 for instruction: %v45 = fptoui <8 x float> undef to <8 x i32>
; AR13: Cost Model: Found an estimated cost of 2 for instruction: %v45 = fptoui <8 x float> undef to <8 x i32>
; CHECK: Cost Model: Found an estimated cost of 24 for instruction: %v46 = fptoui <8 x float> undef to <8 x i16>
; CHECK: Cost Model: Found an estimated cost of 24 for instruction: %v47 = fptoui <8 x float> undef to <8 x i8>
; CHECK: Cost Model: Found an estimated cost of 8 for instruction: %v48 = fptoui <16 x double> undef to <16 x i64>
@ -265,7 +275,8 @@ define void @fptoui() {
; CHECK: Cost Model: Found an estimated cost of 48 for instruction: %v50 = fptoui <16 x double> undef to <16 x i16>
; CHECK: Cost Model: Found an estimated cost of 48 for instruction: %v51 = fptoui <16 x double> undef to <16 x i8>
; CHECK: Cost Model: Found an estimated cost of 40 for instruction: %v52 = fptoui <16 x float> undef to <16 x i64>
; CHECK: Cost Model: Found an estimated cost of 48 for instruction: %v53 = fptoui <16 x float> undef to <16 x i32>
; Z13: Cost Model: Found an estimated cost of 48 for instruction: %v53 = fptoui <16 x float> undef to <16 x i32>
; AR13: Cost Model: Found an estimated cost of 4 for instruction: %v53 = fptoui <16 x float> undef to <16 x i32>
; CHECK: Cost Model: Found an estimated cost of 48 for instruction: %v54 = fptoui <16 x float> undef to <16 x i16>
; CHECK: Cost Model: Found an estimated cost of 48 for instruction: %v55 = fptoui <16 x float> undef to <16 x i8>
@ -379,7 +390,8 @@ define void @sitofp() {
; CHECK: Cost Model: Found an estimated cost of 7 for instruction: %v14 = sitofp <2 x i64> undef to <2 x float>
; CHECK: Cost Model: Found an estimated cost of 5 for instruction: %v15 = sitofp <2 x i32> undef to <2 x fp128>
; CHECK: Cost Model: Found an estimated cost of 7 for instruction: %v16 = sitofp <2 x i32> undef to <2 x double>
; CHECK: Cost Model: Found an estimated cost of 14 for instruction: %v17 = sitofp <2 x i32> undef to <2 x float>
; Z13: Cost Model: Found an estimated cost of 14 for instruction: %v17 = sitofp <2 x i32> undef to <2 x float>
; AR13: Cost Model: Found an estimated cost of 1 for instruction: %v17 = sitofp <2 x i32> undef to <2 x float>
; CHECK: Cost Model: Found an estimated cost of 7 for instruction: %v18 = sitofp <2 x i16> undef to <2 x fp128>
; CHECK: Cost Model: Found an estimated cost of 9 for instruction: %v19 = sitofp <2 x i16> undef to <2 x double>
; CHECK: Cost Model: Found an estimated cost of 9 for instruction: %v20 = sitofp <2 x i16> undef to <2 x float>
@ -391,7 +403,8 @@ define void @sitofp() {
; CHECK: Cost Model: Found an estimated cost of 13 for instruction: %v26 = sitofp <4 x i64> undef to <4 x float>
; CHECK: Cost Model: Found an estimated cost of 9 for instruction: %v27 = sitofp <4 x i32> undef to <4 x fp128>
; CHECK: Cost Model: Found an estimated cost of 13 for instruction: %v28 = sitofp <4 x i32> undef to <4 x double>
; CHECK: Cost Model: Found an estimated cost of 13 for instruction: %v29 = sitofp <4 x i32> undef to <4 x float>
; Z13: Cost Model: Found an estimated cost of 13 for instruction: %v29 = sitofp <4 x i32> undef to <4 x float>
; AR13: Cost Model: Found an estimated cost of 1 for instruction: %v29 = sitofp <4 x i32> undef to <4 x float>
; CHECK: Cost Model: Found an estimated cost of 13 for instruction: %v30 = sitofp <4 x i16> undef to <4 x fp128>
; CHECK: Cost Model: Found an estimated cost of 17 for instruction: %v31 = sitofp <4 x i16> undef to <4 x double>
; CHECK: Cost Model: Found an estimated cost of 17 for instruction: %v32 = sitofp <4 x i16> undef to <4 x float>
@ -403,7 +416,8 @@ define void @sitofp() {
; CHECK: Cost Model: Found an estimated cost of 25 for instruction: %v38 = sitofp <8 x i64> undef to <8 x float>
; CHECK: Cost Model: Found an estimated cost of 17 for instruction: %v39 = sitofp <8 x i32> undef to <8 x fp128>
; CHECK: Cost Model: Found an estimated cost of 25 for instruction: %v40 = sitofp <8 x i32> undef to <8 x double>
; CHECK: Cost Model: Found an estimated cost of 25 for instruction: %v41 = sitofp <8 x i32> undef to <8 x float>
; Z13: Cost Model: Found an estimated cost of 25 for instruction: %v41 = sitofp <8 x i32> undef to <8 x float>
; AR13: Cost Model: Found an estimated cost of 2 for instruction: %v41 = sitofp <8 x i32> undef to <8 x float>
; CHECK: Cost Model: Found an estimated cost of 25 for instruction: %v42 = sitofp <8 x i16> undef to <8 x fp128>
; CHECK: Cost Model: Found an estimated cost of 33 for instruction: %v43 = sitofp <8 x i16> undef to <8 x double>
; CHECK: Cost Model: Found an estimated cost of 33 for instruction: %v44 = sitofp <8 x i16> undef to <8 x float>
@ -413,7 +427,8 @@ define void @sitofp() {
; CHECK: Cost Model: Found an estimated cost of 8 for instruction: %v48 = sitofp <16 x i64> undef to <16 x double>
; CHECK: Cost Model: Found an estimated cost of 49 for instruction: %v49 = sitofp <16 x i64> undef to <16 x float>
; CHECK: Cost Model: Found an estimated cost of 49 for instruction: %v50 = sitofp <16 x i32> undef to <16 x double>
; CHECK: Cost Model: Found an estimated cost of 49 for instruction: %v51 = sitofp <16 x i32> undef to <16 x float>
; Z13: Cost Model: Found an estimated cost of 49 for instruction: %v51 = sitofp <16 x i32> undef to <16 x float>
; AR13: Cost Model: Found an estimated cost of 4 for instruction: %v51 = sitofp <16 x i32> undef to <16 x float>
; CHECK: Cost Model: Found an estimated cost of 65 for instruction: %v52 = sitofp <16 x i16> undef to <16 x double>
; CHECK: Cost Model: Found an estimated cost of 65 for instruction: %v53 = sitofp <16 x i16> undef to <16 x float>
; CHECK: Cost Model: Found an estimated cost of 65 for instruction: %v54 = sitofp <16 x i8> undef to <16 x double>
@ -497,7 +512,8 @@ define void @uitofp() {
; CHECK: Cost Model: Found an estimated cost of 7 for instruction: %v14 = uitofp <2 x i64> undef to <2 x float>
; CHECK: Cost Model: Found an estimated cost of 5 for instruction: %v15 = uitofp <2 x i32> undef to <2 x fp128>
; CHECK: Cost Model: Found an estimated cost of 7 for instruction: %v16 = uitofp <2 x i32> undef to <2 x double>
; CHECK: Cost Model: Found an estimated cost of 14 for instruction: %v17 = uitofp <2 x i32> undef to <2 x float>
; Z13: Cost Model: Found an estimated cost of 14 for instruction: %v17 = uitofp <2 x i32> undef to <2 x float>
; AR13: Cost Model: Found an estimated cost of 1 for instruction: %v17 = uitofp <2 x i32> undef to <2 x float>
; CHECK: Cost Model: Found an estimated cost of 7 for instruction: %v18 = uitofp <2 x i16> undef to <2 x fp128>
; CHECK: Cost Model: Found an estimated cost of 9 for instruction: %v19 = uitofp <2 x i16> undef to <2 x double>
; CHECK: Cost Model: Found an estimated cost of 9 for instruction: %v20 = uitofp <2 x i16> undef to <2 x float>
@ -509,7 +525,8 @@ define void @uitofp() {
; CHECK: Cost Model: Found an estimated cost of 13 for instruction: %v26 = uitofp <4 x i64> undef to <4 x float>
; CHECK: Cost Model: Found an estimated cost of 9 for instruction: %v27 = uitofp <4 x i32> undef to <4 x fp128>
; CHECK: Cost Model: Found an estimated cost of 13 for instruction: %v28 = uitofp <4 x i32> undef to <4 x double>
; CHECK: Cost Model: Found an estimated cost of 13 for instruction: %v29 = uitofp <4 x i32> undef to <4 x float>
; Z13: Cost Model: Found an estimated cost of 13 for instruction: %v29 = uitofp <4 x i32> undef to <4 x float>
; AR13: Cost Model: Found an estimated cost of 1 for instruction: %v29 = uitofp <4 x i32> undef to <4 x float>
; CHECK: Cost Model: Found an estimated cost of 13 for instruction: %v30 = uitofp <4 x i16> undef to <4 x fp128>
; CHECK: Cost Model: Found an estimated cost of 17 for instruction: %v31 = uitofp <4 x i16> undef to <4 x double>
; CHECK: Cost Model: Found an estimated cost of 17 for instruction: %v32 = uitofp <4 x i16> undef to <4 x float>
@ -521,7 +538,8 @@ define void @uitofp() {
; CHECK: Cost Model: Found an estimated cost of 25 for instruction: %v38 = uitofp <8 x i64> undef to <8 x float>
; CHECK: Cost Model: Found an estimated cost of 17 for instruction: %v39 = uitofp <8 x i32> undef to <8 x fp128>
; CHECK: Cost Model: Found an estimated cost of 25 for instruction: %v40 = uitofp <8 x i32> undef to <8 x double>
; CHECK: Cost Model: Found an estimated cost of 25 for instruction: %v41 = uitofp <8 x i32> undef to <8 x float>
; Z13: Cost Model: Found an estimated cost of 25 for instruction: %v41 = uitofp <8 x i32> undef to <8 x float>
; AR13: Cost Model: Found an estimated cost of 2 for instruction: %v41 = uitofp <8 x i32> undef to <8 x float>
; CHECK: Cost Model: Found an estimated cost of 25 for instruction: %v42 = uitofp <8 x i16> undef to <8 x fp128>
; CHECK: Cost Model: Found an estimated cost of 33 for instruction: %v43 = uitofp <8 x i16> undef to <8 x double>
; CHECK: Cost Model: Found an estimated cost of 33 for instruction: %v44 = uitofp <8 x i16> undef to <8 x float>
@ -531,7 +549,8 @@ define void @uitofp() {
; CHECK: Cost Model: Found an estimated cost of 8 for instruction: %v48 = uitofp <16 x i64> undef to <16 x double>
; CHECK: Cost Model: Found an estimated cost of 49 for instruction: %v49 = uitofp <16 x i64> undef to <16 x float>
; CHECK: Cost Model: Found an estimated cost of 49 for instruction: %v50 = uitofp <16 x i32> undef to <16 x double>
; CHECK: Cost Model: Found an estimated cost of 49 for instruction: %v51 = uitofp <16 x i32> undef to <16 x float>
; Z13: Cost Model: Found an estimated cost of 49 for instruction: %v51 = uitofp <16 x i32> undef to <16 x float>
; AR13: Cost Model: Found an estimated cost of 4 for instruction: %v51 = uitofp <16 x i32> undef to <16 x float>
; CHECK: Cost Model: Found an estimated cost of 65 for instruction: %v52 = uitofp <16 x i16> undef to <16 x double>
; CHECK: Cost Model: Found an estimated cost of 65 for instruction: %v53 = uitofp <16 x i16> undef to <16 x float>
; CHECK: Cost Model: Found an estimated cost of 65 for instruction: %v54 = uitofp <16 x i8> undef to <16 x double>

View File

@ -1,4 +1,7 @@
; RUN: opt < %s -cost-model -analyze -mtriple=systemz-unknown -mcpu=z13 | FileCheck %s
; RUN: opt < %s -cost-model -analyze -mtriple=systemz-unknown -mcpu=z13 \
; RUN: | FileCheck %s -check-prefixes=CHECK,Z13
; RUN: opt < %s -cost-model -analyze -mtriple=systemz-unknown -mcpu=arch13 \
; RUN: | FileCheck %s -check-prefixes=CHECK,AR13
define void @bswap_i64(i64 %arg, <2 x i64> %arg2) {
; CHECK: Printing analysis 'Cost Model Analysis' for function 'bswap_i64':
@ -63,6 +66,32 @@ define void @bswap_i64_mem(i64* %src, i64 %arg, i64* %dst) {
ret void
}
define void @bswap_v2i64_mem(<2 x i64>* %src, <2 x i64> %arg, <2 x i64>* %dst) {
; CHECK:Printing analysis 'Cost Model Analysis' for function 'bswap_v2i64_mem':
; Z13: Cost Model: Found an estimated cost of 1 for instruction: %Ld1 = load <2 x i64>, <2 x i64>* %src
; AR13: Cost Model: Found an estimated cost of 0 for instruction: %Ld1 = load <2 x i64>, <2 x i64>* %src
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %swp1 = tail call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %Ld1)
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %swp2 = tail call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %arg)
; Z13: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i64> %swp2, <2 x i64>* %dst
; AR13: Cost Model: Found an estimated cost of 0 for instruction: store <2 x i64> %swp2, <2 x i64>* %dst
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %Ld2 = load <2 x i64>, <2 x i64>* %src
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %swp3 = tail call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %Ld2)
; Z13: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i64> %swp3, <2 x i64>* %dst
; AR13: Cost Model: Found an estimated cost of 0 for instruction: store <2 x i64> %swp3, <2 x i64>* %dst
%Ld1 = load <2 x i64>, <2 x i64>* %src
%swp1 = tail call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %Ld1)
%swp2 = tail call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %arg)
store <2 x i64> %swp2, <2 x i64>* %dst
%Ld2 = load <2 x i64>, <2 x i64>* %src
%swp3 = tail call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %Ld2)
store <2 x i64> %swp3, <2 x i64>* %dst
ret void
}
define void @bswap_i32_mem(i32* %src, i32 %arg, i32* %dst) {
; CHECK: Printing analysis 'Cost Model Analysis' for function 'bswap_i32_mem':
; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %Ld1 = load i32, i32* %src
@ -85,6 +114,31 @@ define void @bswap_i32_mem(i32* %src, i32 %arg, i32* %dst) {
ret void
}
define void @bswap_v4i32_mem(<4 x i32>* %src, <4 x i32> %arg, <4 x i32>* %dst) {
; CHECK: Printing analysis 'Cost Model Analysis' for function 'bswap_v4i32_mem':
; Z13: Cost Model: Found an estimated cost of 1 for instruction: %Ld1 = load <4 x i32>, <4 x i32>* %src
; AR13: Cost Model: Found an estimated cost of 0 for instruction: %Ld1 = load <4 x i32>, <4 x i32>* %src
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %swp1 = tail call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %Ld1)
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %swp2 = tail call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %arg)
; Z13: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> %swp2, <4 x i32>* %dst
; AR13: Cost Model: Found an estimated cost of 0 for instruction: store <4 x i32> %swp2, <4 x i32>* %dst
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %Ld2 = load <4 x i32>, <4 x i32>* %src
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %swp3 = tail call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %Ld2)
; Z13: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> %swp3, <4 x i32>* %dst
; AR13: Cost Model: Found an estimated cost of 0 for instruction: store <4 x i32> %swp3, <4 x i32>* %dst
%Ld1 = load <4 x i32>, <4 x i32>* %src
%swp1 = tail call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %Ld1)
%swp2 = tail call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %arg)
store <4 x i32> %swp2, <4 x i32>* %dst
%Ld2 = load <4 x i32>, <4 x i32>* %src
%swp3 = tail call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %Ld2)
store <4 x i32> %swp3, <4 x i32>* %dst
ret void
}
define void @bswap_i16_mem(i16* %src, i16 %arg, i16* %dst) {
; CHECK: Printing analysis 'Cost Model Analysis' for function 'bswap_i16_mem':
; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %Ld1 = load i16, i16* %src
@ -107,6 +161,30 @@ define void @bswap_i16_mem(i16* %src, i16 %arg, i16* %dst) {
ret void
}
define void @bswap_v8i16_mem(<8 x i16>* %src, <8 x i16> %arg, <8 x i16>* %dst) {
; CHECK: Printing analysis 'Cost Model Analysis' for function 'bswap_v8i16_mem':
; Z13: Cost Model: Found an estimated cost of 1 for instruction: %Ld1 = load <8 x i16>, <8 x i16>* %src
; AR13: Cost Model: Found an estimated cost of 0 for instruction: %Ld1 = load <8 x i16>, <8 x i16>* %src
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %swp1 = tail call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %Ld1)
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %swp2 = tail call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %arg)
; Z13: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> %swp2, <8 x i16>* %dst
; AR13: Cost Model: Found an estimated cost of 0 for instruction: store <8 x i16> %swp2, <8 x i16>* %dst
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %Ld2 = load <8 x i16>, <8 x i16>* %src
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %swp3 = tail call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %Ld2)
; Z13: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> %swp3, <8 x i16>* %dst
; AR13: Cost Model: Found an estimated cost of 0 for instruction: store <8 x i16> %swp3, <8 x i16>* %dst
%Ld1 = load <8 x i16>, <8 x i16>* %src
%swp1 = tail call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %Ld1)
%swp2 = tail call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %arg)
store <8 x i16> %swp2, <8 x i16>* %dst
%Ld2 = load <8 x i16>, <8 x i16>* %src
%swp3 = tail call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %Ld2)
store <8 x i16> %swp3, <8 x i16>* %dst
ret void
}
declare i64 @llvm.bswap.i64(i64)
declare <2 x i64> @llvm.bswap.v2i64(<2 x i64>)

View File

@ -0,0 +1,97 @@
; RUN: opt < %s -cost-model -analyze -mtriple=systemz-unknown -mcpu=z13 \
; RUN: | FileCheck %s -check-prefixes=CHECK,Z13
; RUN: opt < %s -cost-model -analyze -mtriple=systemz-unknown -mcpu=arch13 \
; RUN: | FileCheck %s -check-prefixes=CHECK,AR13
define void @fun0(i32 %a) {
; CHECK-LABEL: Printing analysis 'Cost Model Analysis' for function 'fun0':
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %c0 = xor i32 %l0, -1
; Z13: Cost Model: Found an estimated cost of 1 for instruction: %res0 = or i32 %a, %c0
; AR13: Cost Model: Found an estimated cost of 0 for instruction: %res0 = or i32 %a, %c0
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %c1 = xor i32 %l1, -1
; Z13: Cost Model: Found an estimated cost of 1 for instruction: %res1 = and i32 %a, %c1
; AR13: Cost Model: Found an estimated cost of 0 for instruction: %res1 = and i32 %a, %c1
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %c2 = and i32 %l2, %a
; Z13: Cost Model: Found an estimated cost of 1 for instruction: %res2 = xor i32 %c2, -1
; AR13: Cost Model: Found an estimated cost of 0 for instruction: %res2 = xor i32 %c2, -1
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %c3 = or i32 %l3, %a
; Z13: Cost Model: Found an estimated cost of 1 for instruction: %res3 = xor i32 %c3, -1
; AR13: Cost Model: Found an estimated cost of 0 for instruction: %res3 = xor i32 %c3, -1
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %c4 = xor i32 %l4, %a
; Z13: Cost Model: Found an estimated cost of 1 for instruction: %res4 = xor i32 %c4, -1
; AR13: Cost Model: Found an estimated cost of 0 for instruction: %res4 = xor i32 %c4, -1
entry:
%l0 = load i32, i32* undef
%c0 = xor i32 %l0, -1
%res0 = or i32 %a, %c0
store i32 %res0, i32* undef
%l1 = load i32, i32* undef
%c1 = xor i32 %l1, -1
%res1 = and i32 %a, %c1
store i32 %res1, i32* undef
%l2 = load i32, i32* undef
%c2 = and i32 %l2, %a
%res2 = xor i32 %c2, -1
store i32 %res2, i32* undef
%l3 = load i32, i32* undef
%c3 = or i32 %l3, %a
%res3 = xor i32 %c3, -1
store i32 %res3, i32* undef
%l4 = load i32, i32* undef
%c4 = xor i32 %l4, %a
%res4 = xor i32 %c4, -1
store i32 %res4, i32* undef
ret void
}
define void @fun1(i64 %a) {
; CHECK-LABEL: Printing analysis 'Cost Model Analysis' for function 'fun1':
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %c0 = xor i64 %l0, -1
; Z13: Cost Model: Found an estimated cost of 1 for instruction: %res0 = or i64 %a, %c0
; AR13: Cost Model: Found an estimated cost of 0 for instruction: %res0 = or i64 %a, %c0
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %c1 = xor i64 %l1, -1
; Z13: Cost Model: Found an estimated cost of 1 for instruction: %res1 = and i64 %a, %c1
; AR13: Cost Model: Found an estimated cost of 0 for instruction: %res1 = and i64 %a, %c1
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %c2 = and i64 %l2, %a
; Z13: Cost Model: Found an estimated cost of 1 for instruction: %res2 = xor i64 %c2, -1
; AR13: Cost Model: Found an estimated cost of 0 for instruction: %res2 = xor i64 %c2, -1
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %c3 = or i64 %l3, %a
; Z13: Cost Model: Found an estimated cost of 1 for instruction: %res3 = xor i64 %c3, -1
; AR13: Cost Model: Found an estimated cost of 0 for instruction: %res3 = xor i64 %c3, -1
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %c4 = xor i64 %l4, %a
; Z13: Cost Model: Found an estimated cost of 1 for instruction: %res4 = xor i64 %c4, -1
; AR13: Cost Model: Found an estimated cost of 0 for instruction: %res4 = xor i64 %c4, -1
entry:
%l0 = load i64, i64* undef
%c0 = xor i64 %l0, -1
%res0 = or i64 %a, %c0
store i64 %res0, i64* undef
%l1 = load i64, i64* undef
%c1 = xor i64 %l1, -1
%res1 = and i64 %a, %c1
store i64 %res1, i64* undef
%l2 = load i64, i64* undef
%c2 = and i64 %l2, %a
%res2 = xor i64 %c2, -1
store i64 %res2, i64* undef
%l3 = load i64, i64* undef
%c3 = or i64 %l3, %a
%res3 = xor i64 %c3, -1
store i64 %res3, i64* undef
%l4 = load i64, i64* undef
%c4 = xor i64 %l4, %a
%res4 = xor i64 %c4, -1
store i64 %res4, i64* undef
ret void
}

View File

@ -5,6 +5,9 @@
; Run the test again to make sure it still works the same even
; in the presence of the load-store-on-condition-2 facility.
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 -verify-machineinstrs | FileCheck %s
;
; And again in the presence of the select instructions.
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch13 -verify-machineinstrs | FileCheck %s
; Test LOCR.
define i32 @f1(i32 %a, i32 %b, i32 %limit) {

View File

@ -1,6 +1,11 @@
; Test LOCHI and LOCGHI.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 -verify-machineinstrs | FileCheck %s
;
; Run the test again to make sure it still works the same even
; in the presence of the select instructions.
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch13 -verify-machineinstrs | FileCheck %s
define i32 @f1(i32 %x) {
; CHECK-LABEL: f1:

View File

@ -3,31 +3,36 @@
;
; RUN: llc < %s -verify-machineinstrs -mtriple=s390x-linux-gnu -mcpu=z13 \
; RUN: -no-integrated-as | FileCheck %s
;
; Run the test again to make sure it still works the same even
; in the presence of the select instructions.
; RUN: llc < %s -verify-machineinstrs -mtriple=s390x-linux-gnu -mcpu=arch13 \
; RUN: -no-integrated-as | FileCheck %s
define void @f1(i32 %limit) {
; CHECK-LABEL: f1:
; CHECK-DAG: stepa [[REG1:%r[0-5]]]
; CHECK-DAG: stepb [[REG2:%r[0-5]]]
; CHECK-DAG: clfi %r2, 42
; CHECK: locfhrl [[REG2]], [[REG1]]
; CHECK: stepc [[REG2]]
; CHECK: locfhrhe [[REG1]], [[REG2]]
; CHECK: stepc [[REG1]]
; CHECK: br %r14
%a = call i32 asm sideeffect "stepa $0", "=h"()
%b = call i32 asm sideeffect "stepb $0", "=h"()
%cond = icmp ult i32 %limit, 42
%res = select i1 %cond, i32 %a, i32 %b
call void asm sideeffect "stepc $0", "h"(i32 %res)
call void asm sideeffect "use $0", "h"(i32 %b)
ret void
}
; FIXME: We should commute the LOCRMux to save one move.
define void @f2(i32 %limit) {
; CHECK-LABEL: f2:
; CHECK-DAG: stepa [[REG1:%r[0-5]]]
; CHECK-DAG: stepb [[REG2:%r[0-5]]]
; CHECK-DAG: clijhe %r2, 42,
; CHECK: risblg [[REG2]], [[REG1]], 0, 159, 32
; CHECK-DAG: clijl %r2, 42, [[LABEL:.LBB[0-9_]+]]
; CHECK: risbhg [[REG1]], [[REG2]], 0, 159, 32
; CHECK: [[LABEL]]
; CHECK: stepc [[REG1]]
; CHECK: br %r14
%dummy = call i32 asm sideeffect "dummy $0", "=h"()
@ -37,16 +42,18 @@ define void @f2(i32 %limit) {
%res = select i1 %cond, i32 %a, i32 %b
call void asm sideeffect "stepc $0", "h"(i32 %res)
call void asm sideeffect "dummy $0", "h"(i32 %dummy)
call void asm sideeffect "use $0", "r"(i32 %b)
ret void
}
define void @f3(i32 %limit) {
; CHECK-LABEL: f3:
; CHECK-DAG: stepa [[REG2:%r[0-5]]]
; CHECK-DAG: stepb [[REG1:%r[0-5]]]
; CHECK-DAG: clijhe %r2, 42,
; CHECK: risbhg [[REG1]], [[REG2]], 0, 159, 32
; CHECK: stepc [[REG1]]
; CHECK-DAG: stepa [[REG1:%r[0-5]]]
; CHECK-DAG: stepb [[REG2:%r[0-5]]]
; CHECK-DAG: clijhe %r2, 42, [[LABEL:.LBB[0-9_]+]]
; CHECK: risbhg [[REG2]], [[REG1]], 0, 159, 32
; CHECK: [[LABEL]]
; CHECK: stepc [[REG2]]
; CHECK: br %r14
%dummy = call i32 asm sideeffect "dummy $0", "=h"()
%a = call i32 asm sideeffect "stepa $0", "=r"()
@ -55,17 +62,17 @@ define void @f3(i32 %limit) {
%res = select i1 %cond, i32 %a, i32 %b
call void asm sideeffect "stepc $0", "h"(i32 %res)
call void asm sideeffect "dummy $0", "h"(i32 %dummy)
call void asm sideeffect "use $0", "r"(i32 %a)
ret void
}
; FIXME: We should commute the LOCRMux to save one move.
define void @f4(i32 %limit) {
; CHECK-LABEL: f4:
; CHECK-DAG: stepa [[REG1:%r[0-5]]]
; CHECK-DAG: stepb [[REG2:%r[0-5]]]
; CHECK-DAG: clijhe %r2, 42,
; CHECK: risbhg [[REG2]], [[REG1]], 0, 159, 32
; CHECK-DAG: clijl %r2, 42, [[LABEL:.LBB[0-9_]+]]
; CHECK: risblg [[REG1]], [[REG2]], 0, 159, 32
; CHECK: [[LABEL]]
; CHECK: stepc [[REG1]]
; CHECK: br %r14
%dummy = call i32 asm sideeffect "dummy $0", "=h"()
@ -75,6 +82,7 @@ define void @f4(i32 %limit) {
%res = select i1 %cond, i32 %a, i32 %b
call void asm sideeffect "stepc $0", "r"(i32 %res)
call void asm sideeffect "dummy $0", "h"(i32 %dummy)
call void asm sideeffect "use $0", "h"(i32 %b)
ret void
}
@ -82,8 +90,9 @@ define void @f5(i32 %limit) {
; CHECK-LABEL: f5:
; CHECK-DAG: stepa [[REG2:%r[0-5]]]
; CHECK-DAG: stepb [[REG1:%r[0-5]]]
; CHECK-DAG: clijhe %r2, 42,
; CHECK-DAG: clijhe %r2, 42, [[LABEL:.LBB[0-9_]+]]
; CHECK: risblg [[REG1]], [[REG2]], 0, 159, 32
; CHECK: [[LABEL]]
; CHECK: stepc [[REG1]]
; CHECK: br %r14
%dummy = call i32 asm sideeffect "dummy $0", "=h"()
@ -102,8 +111,8 @@ define void @f6(i32 %limit) {
; CHECK-DAG: stepa [[REG1:%r[0-5]]]
; CHECK-DAG: stepb [[REG2:%r[0-5]]]
; CHECK-DAG: clfi %r2, 41
; CHECK: locfhrle [[REG2]], [[REG1]]
; CHECK: stepc [[REG2]]
; CHECK: locfhrh [[REG1]], [[REG2]]
; CHECK: stepc [[REG1]]
; CHECK: br %r14
entry:
%a = call i32 asm sideeffect "stepa $0", "=h"()
@ -117,6 +126,7 @@ if.then:
return:
%res = phi i32 [ %a, %if.then ], [ %b, %entry ]
call void asm sideeffect "stepc $0", "h"(i32 %res)
call void asm sideeffect "use $0", "h"(i32 %b)
ret void
}
@ -126,8 +136,8 @@ define void @f7(i32 %limit) {
; CHECK-DAG: stepa [[REG1:%r[0-5]]]
; CHECK-DAG: stepb [[REG2:%r[0-5]]]
; CHECK-DAG: clfi %r2, 41
; CHECK: locfhrh [[REG2]], [[REG1]]
; CHECK: stepc [[REG2]]
; CHECK: locfhrle [[REG1]], [[REG2]]
; CHECK: stepc [[REG1]]
; CHECK: br %r14
entry:
%a = call i32 asm sideeffect "stepa $0", "=h"()
@ -141,6 +151,7 @@ if.then:
return:
%res = phi i32 [ %b, %if.then ], [ %a, %entry ]
call void asm sideeffect "stepc $0", "h"(i32 %res)
call void asm sideeffect "use $0", "h"(i32 %b)
ret void
}

View File

@ -0,0 +1,121 @@
; Test SELR and SELGR.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch13 -verify-machineinstrs | FileCheck %s
; Test SELR.
define i32 @f1(i32 %limit, i32 %a, i32 %b) {
; CHECK-LABEL: f1:
; CHECK: clfi %r2, 42
; CHECK: selrl %r2, %r3, %r4
; CHECK: br %r14
%cond = icmp ult i32 %limit, 42
%res = select i1 %cond, i32 %a, i32 %b
ret i32 %res
}
; Test SELGR.
define i64 @f2(i64 %limit, i64 %a, i64 %b) {
; CHECK-LABEL: f2:
; CHECK: clgfi %r2, 42
; CHECK: selgrl %r2, %r3, %r4
; CHECK: br %r14
%cond = icmp ult i64 %limit, 42
%res = select i1 %cond, i64 %a, i64 %b
ret i64 %res
}
; Test SELR in a case that could use COMPARE AND BRANCH. We prefer using
; SELR if possible.
define i32 @f3(i32 %limit, i32 %a, i32 %b) {
; CHECK-LABEL: f3:
; CHECK: chi %r2, 42
; CHECK: selre %r2, %r3, %r4
; CHECK: br %r14
%cond = icmp eq i32 %limit, 42
%res = select i1 %cond, i32 %a, i32 %b
ret i32 %res
}
; ...and again for SELGR.
define i64 @f4(i64 %limit, i64 %a, i64 %b) {
; CHECK-LABEL: f4:
; CHECK: cghi %r2, 42
; CHECK: selgre %r2, %r3, %r4
; CHECK: br %r14
%cond = icmp eq i64 %limit, 42
%res = select i1 %cond, i64 %a, i64 %b
ret i64 %res
}
; Check that we also get SELR as a result of early if-conversion.
define i32 @f5(i32 %limit, i32 %a, i32 %b) {
; CHECK-LABEL: f5:
; CHECK: clfi %r2, 41
; CHECK: selrh %r2, %r4, %r3
; CHECK: br %r14
entry:
%cond = icmp ult i32 %limit, 42
br i1 %cond, label %if.then, label %return
if.then:
br label %return
return:
%res = phi i32 [ %a, %if.then ], [ %b, %entry ]
ret i32 %res
}
; ... and likewise for SELGR.
define i64 @f6(i64 %limit, i64 %a, i64 %b) {
; CHECK-LABEL: f6:
; CHECK: clgfi %r2, 41
; CHECK: selgrh %r2, %r4, %r3
; CHECK: br %r14
entry:
%cond = icmp ult i64 %limit, 42
br i1 %cond, label %if.then, label %return
if.then:
br label %return
return:
%res = phi i64 [ %a, %if.then ], [ %b, %entry ]
ret i64 %res
}
; Check that inverting the condition works as well.
define i32 @f7(i32 %limit, i32 %a, i32 %b) {
; CHECK-LABEL: f7:
; CHECK: clfi %r2, 41
; CHECK: selrh %r2, %r3, %r4
; CHECK: br %r14
entry:
%cond = icmp ult i32 %limit, 42
br i1 %cond, label %if.then, label %return
if.then:
br label %return
return:
%res = phi i32 [ %b, %if.then ], [ %a, %entry ]
ret i32 %res
}
; ... and likewise for SELGR.
define i64 @f8(i64 %limit, i64 %a, i64 %b) {
; CHECK-LABEL: f8:
; CHECK: clgfi %r2, 41
; CHECK: selgrh %r2, %r3, %r4
; CHECK: br %r14
entry:
%cond = icmp ult i64 %limit, 42
br i1 %cond, label %if.then, label %return
if.then:
br label %return
return:
%res = phi i64 [ %b, %if.then ], [ %a, %entry ]
ret i64 %res
}

View File

@ -0,0 +1,76 @@
; Test SELFHR.
; See comments in asm-18.ll about testing high-word operations.
;
; RUN: llc < %s -verify-machineinstrs -mtriple=s390x-linux-gnu -mcpu=arch13 \
; RUN: -no-integrated-as | FileCheck %s
define void @f1(i32 %limit) {
; CHECK-LABEL: f1:
; CHECK-DAG: stepa [[REG1:%r[0-5]]]
; CHECK-DAG: stepb [[REG2:%r[0-5]]]
; CHECK-DAG: clfi %r2, 42
; CHECK: selfhrl [[REG3:%r[0-5]]], [[REG1]], [[REG2]]
; CHECK: stepc [[REG3]]
; CHECK: br %r14
%a = call i32 asm sideeffect "stepa $0", "=h"()
%b = call i32 asm sideeffect "stepb $0", "=h"()
%cond = icmp ult i32 %limit, 42
%res = select i1 %cond, i32 %a, i32 %b
call void asm sideeffect "stepc $0", "h"(i32 %res)
call void asm sideeffect "use $0", "h"(i32 %a)
call void asm sideeffect "use $0", "h"(i32 %b)
ret void
}
; Check that we also get SELFHR as a result of early if-conversion.
define void @f2(i32 %limit) {
; CHECK-LABEL: f2:
; CHECK-DAG: stepa [[REG1:%r[0-5]]]
; CHECK-DAG: stepb [[REG2:%r[0-5]]]
; CHECK-DAG: clfi %r2, 41
; CHECK: selfhrh [[REG3:%r[0-5]]], [[REG2]], [[REG1]]
; CHECK: stepc [[REG3]]
; CHECK: br %r14
entry:
%a = call i32 asm sideeffect "stepa $0", "=h"()
%b = call i32 asm sideeffect "stepb $0", "=h"()
%cond = icmp ult i32 %limit, 42
br i1 %cond, label %if.then, label %return
if.then:
br label %return
return:
%res = phi i32 [ %a, %if.then ], [ %b, %entry ]
call void asm sideeffect "stepc $0", "h"(i32 %res)
call void asm sideeffect "use $0", "h"(i32 %a)
call void asm sideeffect "use $0", "h"(i32 %b)
ret void
}
; Check that inverting the condition works as well.
define void @f3(i32 %limit) {
; CHECK-LABEL: f3:
; CHECK-DAG: stepa [[REG1:%r[0-5]]]
; CHECK-DAG: stepb [[REG2:%r[0-5]]]
; CHECK-DAG: clfi %r2, 41
; CHECK: selfhrh [[REG3:%r[0-5]]], [[REG1]], [[REG2]]
; CHECK: stepc [[REG3]]
; CHECK: br %r14
entry:
%a = call i32 asm sideeffect "stepa $0", "=h"()
%b = call i32 asm sideeffect "stepb $0", "=h"()
%cond = icmp ult i32 %limit, 42
br i1 %cond, label %if.then, label %return
if.then:
br label %return
return:
%res = phi i32 [ %b, %if.then ], [ %a, %entry ]
call void asm sideeffect "stepc $0", "h"(i32 %res)
call void asm sideeffect "use $0", "h"(i32 %a)
call void asm sideeffect "use $0", "h"(i32 %b)
ret void
}

View File

@ -0,0 +1,179 @@
# RUN: llc -mtriple=s390x-linux-gnu -mcpu=arch13 -start-before=greedy %s -o - \
# RUN: | FileCheck %s
#
# Test that regalloc manages (via regalloc hints) to avoid a LOCRMux jump
# sequence expansion, and a SELR instuction is emitted.
--- |
; ModuleID = 'tc.ll'
source_filename = "tc.ll"
target datalayout = "E-m:e-i1:8:16-i8:8:16-i64:64-f128:64-v128:64-a:8:16-n32:64"
@globvar = external global i32
declare void @fun() #0
define void @fun1() #0 {
bb5:
br label %bb6
bb6: ; preds = %bb33, %bb5
%tmp = phi i1 [ %tmp34, %bb33 ], [ undef, %bb5 ]
br label %bb7
bb7: ; preds = %bb7, %bb6
%lsr.iv1 = phi [512 x i32]* [ %0, %bb7 ], [ undef, %bb6 ]
%tmp8 = phi i32 [ %tmp27, %bb7 ], [ -1000000, %bb6 ]
%tmp9 = phi i64 [ %tmp28, %bb7 ], [ 0, %bb6 ]
%lsr3 = trunc i64 %tmp9 to i32
%lsr.iv12 = bitcast [512 x i32]* %lsr.iv1 to i32*
%tmp11 = load i32, i32* %lsr.iv12
%tmp12 = icmp sgt i32 %tmp11, undef
%tmp13 = trunc i64 %tmp9 to i32
%tmp14 = select i1 %tmp12, i32 %lsr3, i32 0
%tmp15 = select i1 %tmp12, i32 %tmp13, i32 %tmp8
%tmp16 = load i32, i32* undef
%tmp17 = select i1 false, i32 undef, i32 %tmp14
%tmp18 = select i1 false, i32 undef, i32 %tmp15
%tmp19 = select i1 false, i32 %tmp16, i32 undef
%tmp20 = select i1 undef, i32 undef, i32 %tmp17
%tmp21 = select i1 undef, i32 undef, i32 %tmp18
%tmp22 = select i1 undef, i32 undef, i32 %tmp19
%tmp23 = or i64 %tmp9, 3
%tmp24 = icmp sgt i32 undef, %tmp22
%tmp25 = trunc i64 %tmp23 to i32
%tmp26 = select i1 %tmp24, i32 %tmp25, i32 %tmp20
%tmp27 = select i1 %tmp24, i32 %tmp25, i32 %tmp21
%tmp28 = add nuw nsw i64 %tmp9, 4
%tmp29 = icmp eq i64 undef, 0
%scevgep = getelementptr [512 x i32], [512 x i32]* %lsr.iv1, i64 0, i64 4
%0 = bitcast i32* %scevgep to [512 x i32]*
br i1 %tmp29, label %bb30, label %bb7
bb30: ; preds = %bb7
%tmp32 = icmp sgt i32 %tmp27, -1000000
br i1 %tmp32, label %bb33, label %bb35
bb33: ; preds = %bb30
call void @fun()
store i32 %tmp26, i32* @globvar
%tmp34 = icmp ugt i32 undef, 1
br label %bb6
bb35: ; preds = %bb30
br i1 %tmp, label %bb37, label %bb38
bb37: ; preds = %bb35
unreachable
bb38: ; preds = %bb35
unreachable
}
; Function Attrs: nounwind
declare void @llvm.stackprotector(i8*, i8**) #1
attributes #0 = { "target-cpu"="arch13" }
attributes #1 = { nounwind }
...
# CHECK: selr
# CHECK-NOT: risblg
---
name: fun1
alignment: 4
tracksRegLiveness: true
registers:
- { id: 0, class: grx32bit }
- { id: 1, class: addr64bit }
- { id: 2, class: grx32bit }
- { id: 3, class: addr64bit }
- { id: 4, class: gr32bit }
- { id: 5, class: grx32bit }
- { id: 6, class: gr64bit }
- { id: 7, class: gr64bit }
- { id: 8, class: grx32bit }
- { id: 9, class: grx32bit }
- { id: 10, class: gr64bit }
- { id: 11, class: grx32bit }
- { id: 12, class: gr64bit }
- { id: 13, class: grx32bit }
- { id: 14, class: gr32bit }
- { id: 15, class: gr32bit }
- { id: 16, class: grx32bit }
- { id: 17, class: grx32bit }
- { id: 18, class: gr32bit }
- { id: 19, class: addr64bit }
- { id: 20, class: grx32bit }
- { id: 21, class: gr32bit }
- { id: 22, class: gr64bit }
- { id: 23, class: grx32bit }
- { id: 24, class: grx32bit }
- { id: 25, class: grx32bit }
- { id: 26, class: addr64bit }
- { id: 27, class: grx32bit }
- { id: 28, class: addr64bit }
frameInfo:
hasCalls: true
body: |
bb.0.bb5:
%25:grx32bit = IMPLICIT_DEF
bb.1.bb6:
%28:addr64bit = LGHI 0
%27:grx32bit = IIFMux 4293967296
%26:addr64bit = IMPLICIT_DEF
bb.2.bb7:
successors: %bb.3(0x04000000), %bb.2(0x7c000000)
%14:gr32bit = LMux %26, 0, $noreg :: (load 4 from %ir.lsr.iv12)
CR %14, undef %15:gr32bit, implicit-def $cc
%16:grx32bit = COPY %28.subreg_l32
%16:grx32bit = LOCHIMux %16, 0, 14, 12, implicit $cc
%17:grx32bit = SELRMux %27, %28.subreg_l32, 14, 2, implicit killed $cc
%18:gr32bit = LMux undef %19:addr64bit, 0, $noreg :: (load 4 from `i32* undef`)
%20:grx32bit = COPY %28.subreg_l32
%20:grx32bit = OILMux %20, 3, implicit-def dead $cc
CR undef %21:gr32bit, %18, implicit-def $cc
%4:gr32bit = SELRMux %16, %20, 14, 2, implicit $cc
%27:grx32bit = SELRMux %17, %20, 14, 2, implicit killed $cc
%28:addr64bit = nuw nsw LA %28, 4, $noreg
%26:addr64bit = LA %26, 16, $noreg
CGHI undef %22:gr64bit, 0, implicit-def $cc
BRC 14, 6, %bb.2, implicit killed $cc
J %bb.3
bb.3.bb30:
successors: %bb.4(0x7fffffff), %bb.5(0x00000001)
CFIMux %27, -999999, implicit-def $cc
BRC 14, 4, %bb.5, implicit killed $cc
J %bb.4
bb.4.bb33:
ADJCALLSTACKDOWN 0, 0
CallBRASL @fun, csr_systemz, implicit-def dead $r14d, implicit-def dead $cc
ADJCALLSTACKUP 0, 0
STRL %4, @globvar :: (store 4 into @globvar)
CLFIMux undef %23:grx32bit, 1, implicit-def $cc
%25:grx32bit = LHIMux 0
%25:grx32bit = LOCHIMux %25, 1, 14, 2, implicit killed $cc
J %bb.1
bb.5.bb35:
successors: %bb.6, %bb.7
TMLMux %25, 1, implicit-def $cc
BRC 15, 8, %bb.7, implicit killed $cc
J %bb.6
bb.6.bb37:
successors:
bb.7.bb38:
...

View File

@ -0,0 +1,74 @@
; Test population-count instruction on arch13
;
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch13 | FileCheck %s
declare i32 @llvm.ctpop.i32(i32 %a)
declare i64 @llvm.ctpop.i64(i64 %a)
define i32 @f1(i32 %a) {
; CHECK-LABEL: f1:
; CHECK: llgfr %r0, %r2
; CHECK: popcnt %r2, %r0, 8
; CHECK: br %r14
%popcnt = call i32 @llvm.ctpop.i32(i32 %a)
ret i32 %popcnt
}
define i32 @f2(i32 %a) {
; CHECK-LABEL: f2:
; CHECK: llghr %r0, %r2
; CHECK: popcnt %r2, %r0, 8
; CHECK: br %r14
%and = and i32 %a, 65535
%popcnt = call i32 @llvm.ctpop.i32(i32 %and)
ret i32 %popcnt
}
define i32 @f3(i32 %a) {
; CHECK-LABEL: f3:
; CHECK: llgcr %r0, %r2
; CHECK: popcnt %r2, %r0, 8
; CHECK: br %r14
%and = and i32 %a, 255
%popcnt = call i32 @llvm.ctpop.i32(i32 %and)
ret i32 %popcnt
}
define i64 @f4(i64 %a) {
; CHECK-LABEL: f4:
; CHECK: popcnt %r2, %r2, 8
; CHECK: br %r14
%popcnt = call i64 @llvm.ctpop.i64(i64 %a)
ret i64 %popcnt
}
define i64 @f5(i64 %a) {
; CHECK-LABEL: f5:
; CHECK: llgfr %r0, %r2
; CHECK: popcnt %r2, %r0, 8
%and = and i64 %a, 4294967295
%popcnt = call i64 @llvm.ctpop.i64(i64 %and)
ret i64 %popcnt
}
define i64 @f6(i64 %a) {
; CHECK-LABEL: f6:
; CHECK: llghr %r0, %r2
; CHECK: popcnt %r2, %r0, 8
; CHECK: br %r14
%and = and i64 %a, 65535
%popcnt = call i64 @llvm.ctpop.i64(i64 %and)
ret i64 %popcnt
}
define i64 @f7(i64 %a) {
; CHECK-LABEL: f7:
; CHECK: llgcr %r0, %r2
; CHECK: popcnt %r2, %r0, 8
; CHECK: br %r14
%and = and i64 %a, 255
%popcnt = call i64 @llvm.ctpop.i64(i64 %and)
ret i64 %popcnt
}

View File

@ -0,0 +1,126 @@
; Combined logical operations involving complement on arch13
;
; RUN: llc -mcpu=arch13 < %s -mtriple=s390x-linux-gnu | FileCheck %s
; And-with-complement 32-bit.
define i32 @f1(i32 %dummy, i32 %a, i32 %b) {
; CHECK-LABEL: f1:
; CHECK: ncrk %r2, %r3, %r4
; CHECK: br %r14
%neg = xor i32 %b, -1
%ret = and i32 %neg, %a
ret i32 %ret
}
; And-with-complement 64-bit.
define i64 @f2(i64 %dummy, i64 %a, i64 %b) {
; CHECK-LABEL: f2:
; CHECK: ncgrk %r2, %r3, %r4
; CHECK: br %r14
%neg = xor i64 %b, -1
%ret = and i64 %neg, %a
ret i64 %ret
}
; Or-with-complement 32-bit.
define i32 @f3(i32 %dummy, i32 %a, i32 %b) {
; CHECK-LABEL: f3:
; CHECK: ocrk %r2, %r3, %r4
; CHECK: br %r14
%neg = xor i32 %b, -1
%ret = or i32 %neg, %a
ret i32 %ret
}
; Or-with-complement 64-bit.
define i64 @f4(i64 %dummy, i64 %a, i64 %b) {
; CHECK-LABEL: f4:
; CHECK: ocgrk %r2, %r3, %r4
; CHECK: br %r14
%neg = xor i64 %b, -1
%ret = or i64 %neg, %a
ret i64 %ret
}
; NAND 32-bit.
define i32 @f5(i32 %dummy, i32 %a, i32 %b) {
; CHECK-LABEL: f5:
; CHECK: nnrk %r2, %r3, %r4
; CHECK: br %r14
%tmp = and i32 %a, %b
%ret = xor i32 %tmp, -1
ret i32 %ret
}
; NAND 64-bit.
define i64 @f6(i64 %dummy, i64 %a, i64 %b) {
; CHECK-LABEL: f6:
; CHECK: nngrk %r2, %r3, %r4
; CHECK: br %r14
%tmp = and i64 %a, %b
%ret = xor i64 %tmp, -1
ret i64 %ret
}
; NOR 32-bit.
define i32 @f7(i32 %dummy, i32 %a, i32 %b) {
; CHECK-LABEL: f7:
; CHECK: nork %r2, %r3, %r4
; CHECK: br %r14
%tmp = or i32 %a, %b
%ret = xor i32 %tmp, -1
ret i32 %ret
}
; NOR 64-bit.
define i64 @f8(i64 %dummy, i64 %a, i64 %b) {
; CHECK-LABEL: f8:
; CHECK: nogrk %r2, %r3, %r4
; CHECK: br %r14
%tmp = or i64 %a, %b
%ret = xor i64 %tmp, -1
ret i64 %ret
}
; NXOR 32-bit.
define i32 @f9(i32 %dummy, i32 %a, i32 %b) {
; CHECK-LABEL: f9:
; CHECK: nxrk %r2, %r3, %r4
; CHECK: br %r14
%tmp = xor i32 %a, %b
%ret = xor i32 %tmp, -1
ret i32 %ret
}
; NXOR 64-bit.
define i64 @f10(i64 %dummy, i64 %a, i64 %b) {
; CHECK-LABEL: f10:
; CHECK: nxgrk %r2, %r3, %r4
; CHECK: br %r14
%tmp = xor i64 %a, %b
%ret = xor i64 %tmp, -1
ret i64 %ret
}
; Or-with-complement 32-bit of a constant.
define i32 @f11(i32 %a) {
; CHECK-LABEL: f11:
; CHECK: lhi [[REG:%r[0-5]]], -256
; CHECK: ocrk %r2, [[REG]], %r2
; CHECK: br %r14
%neg = xor i32 %a, -1
%ret = or i32 %neg, -256
ret i32 %ret
}
; Or-with-complement 64-bit of a constant.
define i64 @f12(i64 %a) {
; CHECK-LABEL: f12:
; CHECK: lghi [[REG:%r[0-5]]], -256
; CHECK: ocgrk %r2, [[REG]], %r2
; CHECK: br %r14
%neg = xor i64 %a, -1
%ret = or i64 %neg, -256
ret i64 %ret
}

View File

@ -0,0 +1,97 @@
; Test loads of byte-swapped vector elements.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch13 | FileCheck %s
declare <8 x i16> @llvm.bswap.v8i16(<8 x i16>)
declare <4 x i32> @llvm.bswap.v4i32(<4 x i32>)
declare <2 x i64> @llvm.bswap.v2i64(<2 x i64>)
; Test v8i16 loads.
define <8 x i16> @f1(<8 x i16> *%ptr) {
; CHECK-LABEL: f1:
; CHECK: vlbrh %v24, 0(%r2)
; CHECK: br %r14
%load = load <8 x i16>, <8 x i16> *%ptr
%ret = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %load)
ret <8 x i16> %ret
}
; Test v4i32 loads.
define <4 x i32> @f2(<4 x i32> *%ptr) {
; CHECK-LABEL: f2:
; CHECK: vlbrf %v24, 0(%r2)
; CHECK: br %r14
%load = load <4 x i32>, <4 x i32> *%ptr
%ret = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %load)
ret <4 x i32> %ret
}
; Test v2i64 loads.
define <2 x i64> @f3(<2 x i64> *%ptr) {
; CHECK-LABEL: f3:
; CHECK: vlbrg %v24, 0(%r2)
; CHECK: br %r14
%load = load <2 x i64>, <2 x i64> *%ptr
%ret = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %load)
ret <2 x i64> %ret
}
; Test the highest aligned in-range offset.
define <4 x i32> @f4(<4 x i32> *%base) {
; CHECK-LABEL: f4:
; CHECK: vlbrf %v24, 4080(%r2)
; CHECK: br %r14
%ptr = getelementptr <4 x i32>, <4 x i32> *%base, i64 255
%load = load <4 x i32>, <4 x i32> *%ptr
%ret = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %load)
ret <4 x i32> %ret
}
; Test the highest unaligned in-range offset.
define <4 x i32> @f5(i8 *%base) {
; CHECK-LABEL: f5:
; CHECK: vlbrf %v24, 4095(%r2)
; CHECK: br %r14
%addr = getelementptr i8, i8 *%base, i64 4095
%ptr = bitcast i8 *%addr to <4 x i32> *
%load = load <4 x i32>, <4 x i32> *%ptr
%ret = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %load)
ret <4 x i32> %ret
}
; Test the next offset up, which requires separate address logic,
define <4 x i32> @f6(<4 x i32> *%base) {
; CHECK-LABEL: f6:
; CHECK: aghi %r2, 4096
; CHECK: vlbrf %v24, 0(%r2)
; CHECK: br %r14
%ptr = getelementptr <4 x i32>, <4 x i32> *%base, i64 256
%load = load <4 x i32>, <4 x i32> *%ptr
%ret = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %load)
ret <4 x i32> %ret
}
; Test negative offsets, which also require separate address logic,
define <4 x i32> @f7(<4 x i32> *%base) {
; CHECK-LABEL: f7:
; CHECK: aghi %r2, -16
; CHECK: vlbrf %v24, 0(%r2)
; CHECK: br %r14
%ptr = getelementptr <4 x i32>, <4 x i32> *%base, i64 -1
%load = load <4 x i32>, <4 x i32> *%ptr
%ret = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %load)
ret <4 x i32> %ret
}
; Check that indexes are allowed.
define <4 x i32> @f8(i8 *%base, i64 %index) {
; CHECK-LABEL: f8:
; CHECK: vlbrf %v24, 0(%r3,%r2)
; CHECK: br %r14
%addr = getelementptr i8, i8 *%base, i64 %index
%ptr = bitcast i8 *%addr to <4 x i32> *
%load = load <4 x i32>, <4 x i32> *%ptr
%ret = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %load)
ret <4 x i32> %ret
}

View File

@ -0,0 +1,97 @@
; Test stores of byte-swapped vector elements.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch13 | FileCheck %s
declare <8 x i16> @llvm.bswap.v8i16(<8 x i16>)
declare <4 x i32> @llvm.bswap.v4i32(<4 x i32>)
declare <2 x i64> @llvm.bswap.v2i64(<2 x i64>)
; Test v8i16 stores.
define void @f1(<8 x i16> %val, <8 x i16> *%ptr) {
; CHECK-LABEL: f1:
; CHECK: vstbrh %v24, 0(%r2)
; CHECK: br %r14
%swap = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %val)
store <8 x i16> %swap, <8 x i16> *%ptr
ret void
}
; Test v4i32 stores.
define void @f2(<4 x i32> %val, <4 x i32> *%ptr) {
; CHECK-LABEL: f2:
; CHECK: vstbrf %v24, 0(%r2)
; CHECK: br %r14
%swap = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %val)
store <4 x i32> %swap, <4 x i32> *%ptr
ret void
}
; Test v2i64 stores.
define void @f3(<2 x i64> %val, <2 x i64> *%ptr) {
; CHECK-LABEL: f3:
; CHECK: vstbrg %v24, 0(%r2)
; CHECK: br %r14
%swap = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %val)
store <2 x i64> %swap, <2 x i64> *%ptr
ret void
}
; Test the highest aligned in-range offset.
define void @f4(<4 x i32> %val, <4 x i32> *%base) {
; CHECK-LABEL: f4:
; CHECK: vstbrf %v24, 4080(%r2)
; CHECK: br %r14
%ptr = getelementptr <4 x i32>, <4 x i32> *%base, i64 255
%swap = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %val)
store <4 x i32> %swap, <4 x i32> *%ptr
ret void
}
; Test the highest unaligned in-range offset.
define void @f5(<4 x i32> %val, i8 *%base) {
; CHECK-LABEL: f5:
; CHECK: vstbrf %v24, 4095(%r2)
; CHECK: br %r14
%addr = getelementptr i8, i8 *%base, i64 4095
%ptr = bitcast i8 *%addr to <4 x i32> *
%swap = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %val)
store <4 x i32> %swap, <4 x i32> *%ptr, align 1
ret void
}
; Test the next offset up, which requires separate address logic,
define void @f6(<4 x i32> %val, <4 x i32> *%base) {
; CHECK-LABEL: f6:
; CHECK: aghi %r2, 4096
; CHECK: vstbrf %v24, 0(%r2)
; CHECK: br %r14
%ptr = getelementptr <4 x i32>, <4 x i32> *%base, i64 256
%swap = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %val)
store <4 x i32> %swap, <4 x i32> *%ptr
ret void
}
; Test negative offsets, which also require separate address logic,
define void @f7(<4 x i32> %val, <4 x i32> *%base) {
; CHECK-LABEL: f7:
; CHECK: aghi %r2, -16
; CHECK: vstbrf %v24, 0(%r2)
; CHECK: br %r14
%ptr = getelementptr <4 x i32>, <4 x i32> *%base, i64 -1
%swap = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %val)
store <4 x i32> %swap, <4 x i32> *%ptr
ret void
}
; Check that indexes are allowed.
define void @f8(<4 x i32> %val, i8 *%base, i64 %index) {
; CHECK-LABEL: f8:
; CHECK: vstbrf %v24, 0(%r3,%r2)
; CHECK: br %r14
%addr = getelementptr i8, i8 *%base, i64 %index
%ptr = bitcast i8 *%addr to <4 x i32> *
%swap = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %val)
store <4 x i32> %swap, <4 x i32> *%ptr, align 1
ret void
}

View File

@ -0,0 +1,220 @@
; Test vector insertion of byte-swapped memory values.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch13 | FileCheck %s
declare i16 @llvm.bswap.i16(i16)
declare i32 @llvm.bswap.i32(i32)
declare i64 @llvm.bswap.i64(i64)
declare <8 x i16> @llvm.bswap.v8i16(<8 x i16>)
declare <4 x i32> @llvm.bswap.v4i32(<4 x i32>)
declare <2 x i64> @llvm.bswap.v2i64(<2 x i64>)
; Test v8i16 insertion into the first element.
define <8 x i16> @f1(<8 x i16> %val, i16 *%ptr) {
; CHECK-LABEL: f1:
; CHECK: vlebrh %v24, 0(%r2), 0
; CHECK: br %r14
%element = load i16, i16 *%ptr
%swap = call i16 @llvm.bswap.i16(i16 %element)
%ret = insertelement <8 x i16> %val, i16 %swap, i32 0
ret <8 x i16> %ret
}
; Test v8i16 insertion into the last element.
define <8 x i16> @f2(<8 x i16> %val, i16 *%ptr) {
; CHECK-LABEL: f2:
; CHECK: vlebrh %v24, 0(%r2), 7
; CHECK: br %r14
%element = load i16, i16 *%ptr
%swap = call i16 @llvm.bswap.i16(i16 %element)
%ret = insertelement <8 x i16> %val, i16 %swap, i32 7
ret <8 x i16> %ret
}
; Test v8i16 insertion with the highest in-range offset.
define <8 x i16> @f3(<8 x i16> %val, i16 *%base) {
; CHECK-LABEL: f3:
; CHECK: vlebrh %v24, 4094(%r2), 5
; CHECK: br %r14
%ptr = getelementptr i16, i16 *%base, i32 2047
%element = load i16, i16 *%ptr
%swap = call i16 @llvm.bswap.i16(i16 %element)
%ret = insertelement <8 x i16> %val, i16 %swap, i32 5
ret <8 x i16> %ret
}
; Test v8i16 insertion with the first ouf-of-range offset.
define <8 x i16> @f4(<8 x i16> %val, i16 *%base) {
; CHECK-LABEL: f4:
; CHECK: aghi %r2, 4096
; CHECK: vlebrh %v24, 0(%r2), 1
; CHECK: br %r14
%ptr = getelementptr i16, i16 *%base, i32 2048
%element = load i16, i16 *%ptr
%swap = call i16 @llvm.bswap.i16(i16 %element)
%ret = insertelement <8 x i16> %val, i16 %swap, i32 1
ret <8 x i16> %ret
}
; Test v8i16 insertion into a variable element.
define <8 x i16> @f5(<8 x i16> %val, i16 *%ptr, i32 %index) {
; CHECK-LABEL: f5:
; CHECK-NOT: vlebrh
; CHECK: br %r14
%element = load i16, i16 *%ptr
%swap = call i16 @llvm.bswap.i16(i16 %element)
%ret = insertelement <8 x i16> %val, i16 %swap, i32 %index
ret <8 x i16> %ret
}
; Test v8i16 insertion using a pair of vector bswaps.
define <8 x i16> @f6(<8 x i16> %val, i16 *%ptr) {
; CHECK-LABEL: f6:
; CHECK: vlebrh %v24, 0(%r2), 0
; CHECK: br %r14
%element = load i16, i16 *%ptr
%swapval = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %val)
%insert = insertelement <8 x i16> %swapval, i16 %element, i32 0
%ret = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %insert)
ret <8 x i16> %ret
}
; Test v4i32 insertion into the first element.
define <4 x i32> @f7(<4 x i32> %val, i32 *%ptr) {
; CHECK-LABEL: f7:
; CHECK: vlebrf %v24, 0(%r2), 0
; CHECK: br %r14
%element = load i32, i32 *%ptr
%swap = call i32 @llvm.bswap.i32(i32 %element)
%ret = insertelement <4 x i32> %val, i32 %swap, i32 0
ret <4 x i32> %ret
}
; Test v4i32 insertion into the last element.
define <4 x i32> @f8(<4 x i32> %val, i32 *%ptr) {
; CHECK-LABEL: f8:
; CHECK: vlebrf %v24, 0(%r2), 3
; CHECK: br %r14
%element = load i32, i32 *%ptr
%swap = call i32 @llvm.bswap.i32(i32 %element)
%ret = insertelement <4 x i32> %val, i32 %swap, i32 3
ret <4 x i32> %ret
}
; Test v4i32 insertion with the highest in-range offset.
define <4 x i32> @f9(<4 x i32> %val, i32 *%base) {
; CHECK-LABEL: f9:
; CHECK: vlebrf %v24, 4092(%r2), 2
; CHECK: br %r14
%ptr = getelementptr i32, i32 *%base, i32 1023
%element = load i32, i32 *%ptr
%swap = call i32 @llvm.bswap.i32(i32 %element)
%ret = insertelement <4 x i32> %val, i32 %swap, i32 2
ret <4 x i32> %ret
}
; Test v4i32 insertion with the first ouf-of-range offset.
define <4 x i32> @f10(<4 x i32> %val, i32 *%base) {
; CHECK-LABEL: f10:
; CHECK: aghi %r2, 4096
; CHECK: vlebrf %v24, 0(%r2), 1
; CHECK: br %r14
%ptr = getelementptr i32, i32 *%base, i32 1024
%element = load i32, i32 *%ptr
%swap = call i32 @llvm.bswap.i32(i32 %element)
%ret = insertelement <4 x i32> %val, i32 %swap, i32 1
ret <4 x i32> %ret
}
; Test v4i32 insertion into a variable element.
define <4 x i32> @f11(<4 x i32> %val, i32 *%ptr, i32 %index) {
; CHECK-LABEL: f11:
; CHECK-NOT: vlebrf
; CHECK: br %r14
%element = load i32, i32 *%ptr
%swap = call i32 @llvm.bswap.i32(i32 %element)
%ret = insertelement <4 x i32> %val, i32 %swap, i32 %index
ret <4 x i32> %ret
}
; Test v4i32 insertion using a pair of vector bswaps.
define <4 x i32> @f12(<4 x i32> %val, i32 *%ptr) {
; CHECK-LABEL: f12:
; CHECK: vlebrf %v24, 0(%r2), 0
; CHECK: br %r14
%element = load i32, i32 *%ptr
%swapval = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %val)
%insert = insertelement <4 x i32> %swapval, i32 %element, i32 0
%ret = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %insert)
ret <4 x i32> %ret
}
; Test v2i64 insertion into the first element.
define <2 x i64> @f13(<2 x i64> %val, i64 *%ptr) {
; CHECK-LABEL: f13:
; CHECK: vlebrg %v24, 0(%r2), 0
; CHECK: br %r14
%element = load i64, i64 *%ptr
%swap = call i64 @llvm.bswap.i64(i64 %element)
%ret = insertelement <2 x i64> %val, i64 %swap, i32 0
ret <2 x i64> %ret
}
; Test v2i64 insertion into the last element.
define <2 x i64> @f14(<2 x i64> %val, i64 *%ptr) {
; CHECK-LABEL: f14:
; CHECK: vlebrg %v24, 0(%r2), 1
; CHECK: br %r14
%element = load i64, i64 *%ptr
%swap = call i64 @llvm.bswap.i64(i64 %element)
%ret = insertelement <2 x i64> %val, i64 %swap, i32 1
ret <2 x i64> %ret
}
; Test v2i64 insertion with the highest in-range offset.
define <2 x i64> @f15(<2 x i64> %val, i64 *%base) {
; CHECK-LABEL: f15:
; CHECK: vlebrg %v24, 4088(%r2), 1
; CHECK: br %r14
%ptr = getelementptr i64, i64 *%base, i32 511
%element = load i64, i64 *%ptr
%swap = call i64 @llvm.bswap.i64(i64 %element)
%ret = insertelement <2 x i64> %val, i64 %swap, i32 1
ret <2 x i64> %ret
}
; Test v2i64 insertion with the first ouf-of-range offset.
define <2 x i64> @f16(<2 x i64> %val, i64 *%base) {
; CHECK-LABEL: f16:
; CHECK: aghi %r2, 4096
; CHECK: vlebrg %v24, 0(%r2), 0
; CHECK: br %r14
%ptr = getelementptr i64, i64 *%base, i32 512
%element = load i64, i64 *%ptr
%swap = call i64 @llvm.bswap.i64(i64 %element)
%ret = insertelement <2 x i64> %val, i64 %swap, i32 0
ret <2 x i64> %ret
}
; Test v2i64 insertion into a variable element.
define <2 x i64> @f17(<2 x i64> %val, i64 *%ptr, i32 %index) {
; CHECK-LABEL: f17:
; CHECK-NOT: vlebrg
; CHECK: br %r14
%element = load i64, i64 *%ptr
%swap = call i64 @llvm.bswap.i64(i64 %element)
%ret = insertelement <2 x i64> %val, i64 %swap, i32 %index
ret <2 x i64> %ret
}
; Test v2i64 insertion using a pair of vector bswaps.
define <2 x i64> @f18(<2 x i64> %val, i64 *%ptr) {
; CHECK-LABEL: f18:
; CHECK: vlebrg %v24, 0(%r2), 0
; CHECK: br %r14
%element = load i64, i64 *%ptr
%swapval = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %val)
%insert = insertelement <2 x i64> %swapval, i64 %element, i32 0
%ret = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %insert)
ret <2 x i64> %ret
}

View File

@ -0,0 +1,254 @@
; Test vector extraction of byte-swapped value to memory.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch13 | FileCheck %s
declare i16 @llvm.bswap.i16(i16)
declare i32 @llvm.bswap.i32(i32)
declare i64 @llvm.bswap.i64(i64)
declare <8 x i16> @llvm.bswap.v8i16(<8 x i16>)
declare <4 x i32> @llvm.bswap.v4i32(<4 x i32>)
declare <2 x i64> @llvm.bswap.v2i64(<2 x i64>)
; Test v8i16 extraction from the first element.
define void @f1(<8 x i16> %val, i16 *%ptr) {
; CHECK-LABEL: f1:
; CHECK: vstebrh %v24, 0(%r2), 0
; CHECK: br %r14
%element = extractelement <8 x i16> %val, i32 0
%swap = call i16 @llvm.bswap.i16(i16 %element)
store i16 %swap, i16 *%ptr
ret void
}
; Test v8i16 extraction from the last element.
define void @f2(<8 x i16> %val, i16 *%ptr) {
; CHECK-LABEL: f2:
; CHECK: vstebrh %v24, 0(%r2), 7
; CHECK: br %r14
%element = extractelement <8 x i16> %val, i32 7
%swap = call i16 @llvm.bswap.i16(i16 %element)
store i16 %swap, i16 *%ptr
ret void
}
; Test v8i16 extraction of an invalid element. This must compile,
; but we don't care what it does.
define void @f3(<8 x i16> %val, i16 *%ptr) {
; CHECK-LABEL: f3:
; CHECK-NOT: vstebrh %v24, 0(%r2), 8
; CHECK: br %r14
%element = extractelement <8 x i16> %val, i32 8
%swap = call i16 @llvm.bswap.i16(i16 %element)
store i16 %swap, i16 *%ptr
ret void
}
; Test v8i16 extraction with the highest in-range offset.
define void @f4(<8 x i16> %val, i16 *%base) {
; CHECK-LABEL: f4:
; CHECK: vstebrh %v24, 4094(%r2), 5
; CHECK: br %r14
%ptr = getelementptr i16, i16 *%base, i32 2047
%element = extractelement <8 x i16> %val, i32 5
%swap = call i16 @llvm.bswap.i16(i16 %element)
store i16 %swap, i16 *%ptr
ret void
}
; Test v8i16 extraction with the first ouf-of-range offset.
define void @f5(<8 x i16> %val, i16 *%base) {
; CHECK-LABEL: f5:
; CHECK: aghi %r2, 4096
; CHECK: vstebrh %v24, 0(%r2), 1
; CHECK: br %r14
%ptr = getelementptr i16, i16 *%base, i32 2048
%element = extractelement <8 x i16> %val, i32 1
%swap = call i16 @llvm.bswap.i16(i16 %element)
store i16 %swap, i16 *%ptr
ret void
}
; Test v8i16 extraction from a variable element.
define void @f6(<8 x i16> %val, i16 *%ptr, i32 %index) {
; CHECK-LABEL: f6:
; CHECK-NOT: vstebrh
; CHECK: br %r14
%element = extractelement <8 x i16> %val, i32 %index
%swap = call i16 @llvm.bswap.i16(i16 %element)
store i16 %swap, i16 *%ptr
ret void
}
; Test v8i16 extraction using a vector bswap.
define void @f7(<8 x i16> %val, i16 *%ptr) {
; CHECK-LABEL: f7:
; CHECK: vstebrh %v24, 0(%r2), 0
; CHECK: br %r14
%swap = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %val)
%element = extractelement <8 x i16> %swap, i32 0
store i16 %element, i16 *%ptr
ret void
}
; Test v4i32 extraction from the first element.
define void @f8(<4 x i32> %val, i32 *%ptr) {
; CHECK-LABEL: f8:
; CHECK: vstebrf %v24, 0(%r2), 0
; CHECK: br %r14
%element = extractelement <4 x i32> %val, i32 0
%swap = call i32 @llvm.bswap.i32(i32 %element)
store i32 %swap, i32 *%ptr
ret void
}
; Test v4i32 extraction from the last element.
define void @f9(<4 x i32> %val, i32 *%ptr) {
; CHECK-LABEL: f9:
; CHECK: vstebrf %v24, 0(%r2), 3
; CHECK: br %r14
%element = extractelement <4 x i32> %val, i32 3
%swap = call i32 @llvm.bswap.i32(i32 %element)
store i32 %swap, i32 *%ptr
ret void
}
; Test v4i32 extraction of an invalid element. This must compile,
; but we don't care what it does.
define void @f10(<4 x i32> %val, i32 *%ptr) {
; CHECK-LABEL: f10:
; CHECK-NOT: vstebrf %v24, 0(%r2), 4
; CHECK: br %r14
%element = extractelement <4 x i32> %val, i32 4
%swap = call i32 @llvm.bswap.i32(i32 %element)
store i32 %swap, i32 *%ptr
ret void
}
; Test v4i32 extraction with the highest in-range offset.
define void @f11(<4 x i32> %val, i32 *%base) {
; CHECK-LABEL: f11:
; CHECK: vstebrf %v24, 4092(%r2), 2
; CHECK: br %r14
%ptr = getelementptr i32, i32 *%base, i32 1023
%element = extractelement <4 x i32> %val, i32 2
%swap = call i32 @llvm.bswap.i32(i32 %element)
store i32 %swap, i32 *%ptr
ret void
}
; Test v4i32 extraction with the first ouf-of-range offset.
define void @f12(<4 x i32> %val, i32 *%base) {
; CHECK-LABEL: f12:
; CHECK: aghi %r2, 4096
; CHECK: vstebrf %v24, 0(%r2), 1
; CHECK: br %r14
%ptr = getelementptr i32, i32 *%base, i32 1024
%element = extractelement <4 x i32> %val, i32 1
%swap = call i32 @llvm.bswap.i32(i32 %element)
store i32 %swap, i32 *%ptr
ret void
}
; Test v4i32 extraction from a variable element.
define void @f13(<4 x i32> %val, i32 *%ptr, i32 %index) {
; CHECK-LABEL: f13:
; CHECK-NOT: vstebrf
; CHECK: br %r14
%element = extractelement <4 x i32> %val, i32 %index
%swap = call i32 @llvm.bswap.i32(i32 %element)
store i32 %swap, i32 *%ptr
ret void
}
; Test v4i32 extraction using a vector bswap.
define void @f14(<4 x i32> %val, i32 *%ptr) {
; CHECK-LABEL: f14:
; CHECK: vstebrf %v24, 0(%r2), 0
; CHECK: br %r14
%swap = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %val)
%element = extractelement <4 x i32> %swap, i32 0
store i32 %element, i32 *%ptr
ret void
}
; Test v2i64 extraction from the first element.
define void @f15(<2 x i64> %val, i64 *%ptr) {
; CHECK-LABEL: f15:
; CHECK: vstebrg %v24, 0(%r2), 0
; CHECK: br %r14
%element = extractelement <2 x i64> %val, i32 0
%swap = call i64 @llvm.bswap.i64(i64 %element)
store i64 %swap, i64 *%ptr
ret void
}
; Test v2i64 extraction from the last element.
define void @f16(<2 x i64> %val, i64 *%ptr) {
; CHECK-LABEL: f16:
; CHECK: vstebrg %v24, 0(%r2), 1
; CHECK: br %r14
%element = extractelement <2 x i64> %val, i32 1
%swap = call i64 @llvm.bswap.i64(i64 %element)
store i64 %swap, i64 *%ptr
ret void
}
; Test v2i64 extraction of an invalid element. This must compile,
; but we don't care what it does.
define void @f17(<2 x i64> %val, i64 *%ptr) {
; CHECK-LABEL: f17:
; CHECK-NOT: vstebrg %v24, 0(%r2), 2
; CHECK: br %r14
%element = extractelement <2 x i64> %val, i32 2
%swap = call i64 @llvm.bswap.i64(i64 %element)
store i64 %swap, i64 *%ptr
ret void
}
; Test v2i64 extraction with the highest in-range offset.
define void @f18(<2 x i64> %val, i64 *%base) {
; CHECK-LABEL: f18:
; CHECK: vstebrg %v24, 4088(%r2), 1
; CHECK: br %r14
%ptr = getelementptr i64, i64 *%base, i32 511
%element = extractelement <2 x i64> %val, i32 1
%swap = call i64 @llvm.bswap.i64(i64 %element)
store i64 %swap, i64 *%ptr
ret void
}
; Test v2i64 extraction with the first ouf-of-range offset.
define void @f19(<2 x i64> %val, i64 *%base) {
; CHECK-LABEL: f19:
; CHECK: aghi %r2, 4096
; CHECK: vstebrg %v24, 0(%r2), 0
; CHECK: br %r14
%ptr = getelementptr i64, i64 *%base, i32 512
%element = extractelement <2 x i64> %val, i32 0
%swap = call i64 @llvm.bswap.i64(i64 %element)
store i64 %swap, i64 *%ptr
ret void
}
; Test v2i64 extraction from a variable element.
define void @f20(<2 x i64> %val, i64 *%ptr, i32 %index) {
; CHECK-LABEL: f20:
; CHECK-NOT: vstebrg
; CHECK: br %r14
%element = extractelement <2 x i64> %val, i32 %index
%swap = call i64 @llvm.bswap.i64(i64 %element)
store i64 %swap, i64 *%ptr
ret void
}
; Test v2i64 extraction using a vector bswap.
define void @f21(<2 x i64> %val, i64 *%ptr) {
; CHECK-LABEL: f21:
; CHECK: vstebrg %v24, 0(%r2), 0
; CHECK: br %r14
%swap = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %val)
%element = extractelement <2 x i64> %swap, i32 0
store i64 %element, i64 *%ptr
ret void
}

View File

@ -0,0 +1,136 @@
; Test vector insertions of byte-swapped memory values into 0.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch13 | FileCheck %s
declare i16 @llvm.bswap.i16(i16)
declare i32 @llvm.bswap.i32(i32)
declare i64 @llvm.bswap.i64(i64)
declare <8 x i16> @llvm.bswap.v8i16(<8 x i16>)
declare <4 x i32> @llvm.bswap.v4i32(<4 x i32>)
declare <2 x i64> @llvm.bswap.v2i64(<2 x i64>)
; Test VLLEBRZH.
define <8 x i16> @f1(i16 *%ptr) {
; CHECK-LABEL: f1:
; CHECK: vllebrzh %v24, 0(%r2)
; CHECK: br %r14
%val = load i16, i16 *%ptr
%swap = call i16 @llvm.bswap.i16(i16 %val)
%ret = insertelement <8 x i16> zeroinitializer, i16 %swap, i32 3
ret <8 x i16> %ret
}
; Test VLLEBRZH using a vector bswap.
define <8 x i16> @f2(i16 *%ptr) {
; CHECK-LABEL: f2:
; CHECK: vllebrzh %v24, 0(%r2)
; CHECK: br %r14
%val = load i16, i16 *%ptr
%insert = insertelement <8 x i16> zeroinitializer, i16 %val, i32 3
%ret = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %insert)
ret <8 x i16> %ret
}
; Test VLLEBRZF.
define <4 x i32> @f3(i32 *%ptr) {
; CHECK-LABEL: f3:
; CHECK: vllebrzf %v24, 0(%r2)
; CHECK: br %r14
%val = load i32, i32 *%ptr
%swap = call i32 @llvm.bswap.i32(i32 %val)
%ret = insertelement <4 x i32> zeroinitializer, i32 %swap, i32 1
ret <4 x i32> %ret
}
; Test VLLEBRZF using a vector bswap.
define <4 x i32> @f4(i32 *%ptr) {
; CHECK-LABEL: f4:
; CHECK: vllebrzf %v24, 0(%r2)
; CHECK: br %r14
%val = load i32, i32 *%ptr
%insert = insertelement <4 x i32> zeroinitializer, i32 %val, i32 1
%ret = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %insert)
ret <4 x i32> %ret
}
; Test VLLEBRZG.
define <2 x i64> @f5(i64 *%ptr) {
; CHECK-LABEL: f5:
; CHECK: vllebrzg %v24, 0(%r2)
; CHECK: br %r14
%val = load i64, i64 *%ptr
%swap = call i64 @llvm.bswap.i64(i64 %val)
%ret = insertelement <2 x i64> zeroinitializer, i64 %swap, i32 0
ret <2 x i64> %ret
}
; Test VLLEBRZG using a vector bswap.
define <2 x i64> @f6(i64 *%ptr) {
; CHECK-LABEL: f6:
; CHECK: vllebrzg %v24, 0(%r2)
; CHECK: br %r14
%val = load i64, i64 *%ptr
%insert = insertelement <2 x i64> zeroinitializer, i64 %val, i32 0
%ret = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %insert)
ret <2 x i64> %ret
}
; Test VLLEBRZE.
define <4 x i32> @f7(i32 *%ptr) {
; CHECK-LABEL: f7:
; CHECK: vllebrze %v24, 0(%r2)
; CHECK: br %r14
%val = load i32, i32 *%ptr
%swap = call i32 @llvm.bswap.i32(i32 %val)
%ret = insertelement <4 x i32> zeroinitializer, i32 %swap, i32 0
ret <4 x i32> %ret
}
; Test VLLEBRZE using a vector bswap.
define <4 x i32> @f8(i32 *%ptr) {
; CHECK-LABEL: f8:
; CHECK: vllebrze %v24, 0(%r2)
; CHECK: br %r14
%val = load i32, i32 *%ptr
%insert = insertelement <4 x i32> zeroinitializer, i32 %val, i32 0
%ret = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %insert)
ret <4 x i32> %ret
}
; Test VLLEBRZH with the highest in-range offset.
define <8 x i16> @f9(i16 *%base) {
; CHECK-LABEL: f9:
; CHECK: vllebrzh %v24, 4094(%r2)
; CHECK: br %r14
%ptr = getelementptr i16, i16 *%base, i64 2047
%val = load i16, i16 *%ptr
%swap = call i16 @llvm.bswap.i16(i16 %val)
%ret = insertelement <8 x i16> zeroinitializer, i16 %swap, i32 3
ret <8 x i16> %ret
}
; Test VLLEBRZH with the next highest offset.
define <8 x i16> @f10(i16 *%base) {
; CHECK-LABEL: f10:
; CHECK-NOT: vllebrzh %v24, 4096(%r2)
; CHECK: br %r14
%ptr = getelementptr i16, i16 *%base, i64 2048
%val = load i16, i16 *%ptr
%swap = call i16 @llvm.bswap.i16(i16 %val)
%ret = insertelement <8 x i16> zeroinitializer, i16 %swap, i32 3
ret <8 x i16> %ret
}
; Test that VLLEBRZH allows an index.
define <8 x i16> @f11(i16 *%base, i64 %index) {
; CHECK-LABEL: f11:
; CHECK: sllg [[REG:%r[1-5]]], %r3, 1
; CHECK: vllebrzh %v24, 0([[REG]],%r2)
; CHECK: br %r14
%ptr = getelementptr i16, i16 *%base, i64 %index
%val = load i16, i16 *%ptr
%swap = call i16 @llvm.bswap.i16(i16 %val)
%ret = insertelement <8 x i16> zeroinitializer, i16 %swap, i32 3
ret <8 x i16> %ret
}

View File

@ -0,0 +1,77 @@
; Test insertions of byte-swapped memory values into a nonzero index of an undef.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch13 | FileCheck %s
declare i16 @llvm.bswap.i16(i16)
declare i32 @llvm.bswap.i32(i32)
declare i64 @llvm.bswap.i64(i64)
declare <8 x i16> @llvm.bswap.v8i16(<8 x i16>)
declare <4 x i32> @llvm.bswap.v4i32(<4 x i32>)
declare <2 x i64> @llvm.bswap.v2i64(<2 x i64>)
; Test v8i16 insertion into an undef, with an arbitrary index.
define <8 x i16> @f1(i16 *%ptr) {
; CHECK-LABEL: f1:
; CHECK: vlbrreph %v24, 0(%r2)
; CHECK-NEXT: br %r14
%val = load i16, i16 *%ptr
%swap = call i16 @llvm.bswap.i16(i16 %val)
%ret = insertelement <8 x i16> undef, i16 %swap, i32 5
ret <8 x i16> %ret
}
; Test v8i16 insertion into an undef, using a vector bswap.
define <8 x i16> @f2(i16 *%ptr) {
; CHECK-LABEL: f2:
; CHECK: vlbrreph %v24, 0(%r2)
; CHECK-NEXT: br %r14
%val = load i16, i16 *%ptr
%insert = insertelement <8 x i16> undef, i16 %val, i32 5
%ret = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %insert)
ret <8 x i16> %ret
}
; Test v4i32 insertion into an undef, with an arbitrary index.
define <4 x i32> @f3(i32 *%ptr) {
; CHECK-LABEL: f3:
; CHECK: vlbrrepf %v24, 0(%r2)
; CHECK-NEXT: br %r14
%val = load i32, i32 *%ptr
%swap = call i32 @llvm.bswap.i32(i32 %val)
%ret = insertelement <4 x i32> undef, i32 %swap, i32 2
ret <4 x i32> %ret
}
; Test v4i32 insertion into an undef, using a vector bswap.
define <4 x i32> @f4(i32 *%ptr) {
; CHECK-LABEL: f4:
; CHECK: vlbrrepf %v24, 0(%r2)
; CHECK-NEXT: br %r14
%val = load i32, i32 *%ptr
%insert = insertelement <4 x i32> undef, i32 %val, i32 2
%ret = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %insert)
ret <4 x i32> %ret
}
; Test v2i64 insertion into an undef, with an arbitrary index.
define <2 x i64> @f5(i64 *%ptr) {
; CHECK-LABEL: f5:
; CHECK: vlbrrepg %v24, 0(%r2)
; CHECK-NEXT: br %r14
%val = load i64, i64 *%ptr
%swap = call i64 @llvm.bswap.i64(i64 %val)
%ret = insertelement <2 x i64> undef, i64 %swap, i32 1
ret <2 x i64> %ret
}
; Test v2i64 insertion into an undef, using a vector bwap.
define <2 x i64> @f6(i64 *%ptr) {
; CHECK-LABEL: f6:
; CHECK: vlbrrepg %v24, 0(%r2)
; CHECK-NEXT: br %r14
%val = load i64, i64 *%ptr
%insert = insertelement <2 x i64> undef, i64 %val, i32 1
%ret = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %insert)
ret <2 x i64> %ret
}

View File

@ -0,0 +1,192 @@
; Test replications of a byte-swapped scalar memory value.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch13 | FileCheck %s
declare i16 @llvm.bswap.i16(i16)
declare i32 @llvm.bswap.i32(i32)
declare i64 @llvm.bswap.i64(i64)
declare <8 x i16> @llvm.bswap.v8i16(<8 x i16>)
declare <4 x i32> @llvm.bswap.v4i32(<4 x i32>)
declare <2 x i64> @llvm.bswap.v2i64(<2 x i64>)
; Test a v8i16 replicating load with no offset.
define <8 x i16> @f1(i16 *%ptr) {
; CHECK-LABEL: f1:
; CHECK: vlbrreph %v24, 0(%r2)
; CHECK: br %r14
%scalar = load i16, i16 *%ptr
%swap = call i16 @llvm.bswap.i16(i16 %scalar)
%val = insertelement <8 x i16> undef, i16 %swap, i32 0
%ret = shufflevector <8 x i16> %val, <8 x i16> undef,
<8 x i32> zeroinitializer
ret <8 x i16> %ret
}
; Test a v8i16 replicating load with the maximum in-range offset.
define <8 x i16> @f2(i16 *%base) {
; CHECK-LABEL: f2:
; CHECK: vlbrreph %v24, 4094(%r2)
; CHECK: br %r14
%ptr = getelementptr i16, i16 *%base, i64 2047
%scalar = load i16, i16 *%ptr
%swap = call i16 @llvm.bswap.i16(i16 %scalar)
%val = insertelement <8 x i16> undef, i16 %swap, i32 0
%ret = shufflevector <8 x i16> %val, <8 x i16> undef,
<8 x i32> zeroinitializer
ret <8 x i16> %ret
}
; Test a v8i16 replicating load with the first out-of-range offset.
define <8 x i16> @f3(i16 *%base) {
; CHECK-LABEL: f3:
; CHECK: aghi %r2, 4096
; CHECK: vlbrreph %v24, 0(%r2)
; CHECK: br %r14
%ptr = getelementptr i16, i16 *%base, i64 2048
%scalar = load i16, i16 *%ptr
%swap = call i16 @llvm.bswap.i16(i16 %scalar)
%val = insertelement <8 x i16> undef, i16 %swap, i32 0
%ret = shufflevector <8 x i16> %val, <8 x i16> undef,
<8 x i32> zeroinitializer
ret <8 x i16> %ret
}
; Test a v8i16 replicating load using a vector bswap.
define <8 x i16> @f4(i16 *%ptr) {
; CHECK-LABEL: f4:
; CHECK: vlbrreph %v24, 0(%r2)
; CHECK: br %r14
%scalar = load i16, i16 *%ptr
%val = insertelement <8 x i16> undef, i16 %scalar, i32 0
%rep = shufflevector <8 x i16> %val, <8 x i16> undef,
<8 x i32> zeroinitializer
%ret = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %rep)
ret <8 x i16> %ret
}
; Test a v4i32 replicating load with no offset.
define <4 x i32> @f5(i32 *%ptr) {
; CHECK-LABEL: f5:
; CHECK: vlbrrepf %v24, 0(%r2)
; CHECK: br %r14
%scalar = load i32, i32 *%ptr
%swap = call i32 @llvm.bswap.i32(i32 %scalar)
%val = insertelement <4 x i32> undef, i32 %swap, i32 0
%ret = shufflevector <4 x i32> %val, <4 x i32> undef,
<4 x i32> zeroinitializer
ret <4 x i32> %ret
}
; Test a v4i32 replicating load with the maximum in-range offset.
define <4 x i32> @f6(i32 *%base) {
; CHECK-LABEL: f6:
; CHECK: vlbrrepf %v24, 4092(%r2)
; CHECK: br %r14
%ptr = getelementptr i32, i32 *%base, i64 1023
%scalar = load i32, i32 *%ptr
%swap = call i32 @llvm.bswap.i32(i32 %scalar)
%val = insertelement <4 x i32> undef, i32 %swap, i32 0
%ret = shufflevector <4 x i32> %val, <4 x i32> undef,
<4 x i32> zeroinitializer
ret <4 x i32> %ret
}
; Test a v4i32 replicating load with the first out-of-range offset.
define <4 x i32> @f7(i32 *%base) {
; CHECK-LABEL: f7:
; CHECK: aghi %r2, 4096
; CHECK: vlbrrepf %v24, 0(%r2)
; CHECK: br %r14
%ptr = getelementptr i32, i32 *%base, i64 1024
%scalar = load i32, i32 *%ptr
%swap = call i32 @llvm.bswap.i32(i32 %scalar)
%val = insertelement <4 x i32> undef, i32 %swap, i32 0
%ret = shufflevector <4 x i32> %val, <4 x i32> undef,
<4 x i32> zeroinitializer
ret <4 x i32> %ret
}
; Test a v4i32 replicating load using a vector bswap.
define <4 x i32> @f8(i32 *%ptr) {
; CHECK-LABEL: f8:
; CHECK: vlbrrepf %v24, 0(%r2)
; CHECK: br %r14
%scalar = load i32, i32 *%ptr
%val = insertelement <4 x i32> undef, i32 %scalar, i32 0
%rep = shufflevector <4 x i32> %val, <4 x i32> undef,
<4 x i32> zeroinitializer
%ret = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %rep)
ret <4 x i32> %ret
}
; Test a v2i64 replicating load with no offset.
define <2 x i64> @f9(i64 *%ptr) {
; CHECK-LABEL: f9:
; CHECK: vlbrrepg %v24, 0(%r2)
; CHECK: br %r14
%scalar = load i64, i64 *%ptr
%swap = call i64 @llvm.bswap.i64(i64 %scalar)
%val = insertelement <2 x i64> undef, i64 %swap, i32 0
%ret = shufflevector <2 x i64> %val, <2 x i64> undef,
<2 x i32> zeroinitializer
ret <2 x i64> %ret
}
; Test a v2i64 replicating load with the maximum in-range offset.
define <2 x i64> @f10(i64 *%base) {
; CHECK-LABEL: f10:
; CHECK: vlbrrepg %v24, 4088(%r2)
; CHECK: br %r14
%ptr = getelementptr i64, i64 *%base, i32 511
%scalar = load i64, i64 *%ptr
%swap = call i64 @llvm.bswap.i64(i64 %scalar)
%val = insertelement <2 x i64> undef, i64 %swap, i32 0
%ret = shufflevector <2 x i64> %val, <2 x i64> undef,
<2 x i32> zeroinitializer
ret <2 x i64> %ret
}
; Test a v2i64 replicating load with the first out-of-range offset.
define <2 x i64> @f11(i64 *%base) {
; CHECK-LABEL: f11:
; CHECK: aghi %r2, 4096
; CHECK: vlbrrepg %v24, 0(%r2)
; CHECK: br %r14
%ptr = getelementptr i64, i64 *%base, i32 512
%scalar = load i64, i64 *%ptr
%swap = call i64 @llvm.bswap.i64(i64 %scalar)
%val = insertelement <2 x i64> undef, i64 %swap, i32 0
%ret = shufflevector <2 x i64> %val, <2 x i64> undef,
<2 x i32> zeroinitializer
ret <2 x i64> %ret
}
; Test a v2i64 replicating load using a vector bswap.
define <2 x i64> @f12(i64 *%ptr) {
; CHECK-LABEL: f12:
; CHECK: vlbrrepg %v24, 0(%r2)
; CHECK: br %r14
%scalar = load i64, i64 *%ptr
%val = insertelement <2 x i64> undef, i64 %scalar, i32 0
%rep = shufflevector <2 x i64> %val, <2 x i64> undef,
<2 x i32> zeroinitializer
%ret = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %rep)
ret <2 x i64> %ret
}
; Test a v8i16 replicating load with an index.
define <8 x i16> @f13(i16 *%base, i64 %index) {
; CHECK-LABEL: f13:
; CHECK: sllg [[REG:%r[1-5]]], %r3, 1
; CHECK: vlbrreph %v24, 2046([[REG]],%r2)
; CHECK: br %r14
%ptr1 = getelementptr i16, i16 *%base, i64 %index
%ptr = getelementptr i16, i16 *%ptr1, i64 1023
%scalar = load i16, i16 *%ptr
%swap = call i16 @llvm.bswap.i16(i16 %scalar)
%val = insertelement <8 x i16> undef, i16 %swap, i32 0
%ret = shufflevector <8 x i16> %val, <8 x i16> undef,
<8 x i32> zeroinitializer
ret <8 x i16> %ret
}

View File

@ -0,0 +1,40 @@
; Test conversions between integer and float elements on arch13.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch13 | FileCheck %s
; Test conversion of f32s to signed i32s.
define <4 x i32> @f1(<4 x float> %floats) {
; CHECK-LABEL: f1:
; CHECK: vcfeb %v24, %v24, 0, 5
; CHECK: br %r14
%dwords = fptosi <4 x float> %floats to <4 x i32>
ret <4 x i32> %dwords
}
; Test conversion of f32s to unsigned i32s.
define <4 x i32> @f2(<4 x float> %floats) {
; CHECK-LABEL: f2:
; CHECK: vclfeb %v24, %v24, 0, 5
; CHECK: br %r14
%dwords = fptoui <4 x float> %floats to <4 x i32>
ret <4 x i32> %dwords
}
; Test conversion of signed i32s to f32s.
define <4 x float> @f3(<4 x i32> %dwords) {
; CHECK-LABEL: f3:
; CHECK: vcefb %v24, %v24, 0, 0
; CHECK: br %r14
%floats = sitofp <4 x i32> %dwords to <4 x float>
ret <4 x float> %floats
}
; Test conversion of unsigned i32s to f32s.
define <4 x float> @f4(<4 x i32> %dwords) {
; CHECK-LABEL: f4:
; CHECK: vcelfb %v24, %v24, 0, 0
; CHECK: br %r14
%floats = uitofp <4 x i32> %dwords to <4 x float>
ret <4 x float> %floats
}

View File

@ -0,0 +1,138 @@
; Test loads of byte-swapped vector elements.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch13 | FileCheck %s
; Test v16i8 loads.
define <16 x i8> @f1(<16 x i8> *%ptr) {
; CHECK-LABEL: f1:
; CHECK: vlbrq %v24, 0(%r2)
; CHECK: br %r14
%load = load <16 x i8>, <16 x i8> *%ptr
%ret = shufflevector <16 x i8> %load, <16 x i8> undef,
<16 x i32> <i32 15, i32 14, i32 13, i32 12,
i32 11, i32 10, i32 9, i32 8,
i32 7, i32 6, i32 5, i32 4,
i32 3, i32 2, i32 1, i32 0>
ret <16 x i8> %ret
}
; Test v8i16 loads.
define <8 x i16> @f2(<8 x i16> *%ptr) {
; CHECK-LABEL: f2:
; CHECK: vlerh %v24, 0(%r2)
; CHECK: br %r14
%load = load <8 x i16>, <8 x i16> *%ptr
%ret = shufflevector <8 x i16> %load, <8 x i16> undef,
<8 x i32> <i32 7, i32 6, i32 5, i32 4,
i32 3, i32 2, i32 1, i32 0>
ret <8 x i16> %ret
}
; Test v4i32 loads.
define <4 x i32> @f3(<4 x i32> *%ptr) {
; CHECK-LABEL: f3:
; CHECK: vlerf %v24, 0(%r2)
; CHECK: br %r14
%load = load <4 x i32>, <4 x i32> *%ptr
%ret = shufflevector <4 x i32> %load, <4 x i32> undef,
<4 x i32> <i32 3, i32 2, i32 1, i32 0>
ret <4 x i32> %ret
}
; Test v2i64 loads.
define <2 x i64> @f4(<2 x i64> *%ptr) {
; CHECK-LABEL: f4:
; CHECK: vlerg %v24, 0(%r2)
; CHECK: br %r14
%load = load <2 x i64>, <2 x i64> *%ptr
%ret = shufflevector <2 x i64> %load, <2 x i64> undef,
<2 x i32> <i32 1, i32 0>
ret <2 x i64> %ret
}
; Test v4f32 loads.
define <4 x float> @f5(<4 x float> *%ptr) {
; CHECK-LABEL: f5:
; CHECK: vlerf %v24, 0(%r2)
; CHECK: br %r14
%load = load <4 x float>, <4 x float> *%ptr
%ret = shufflevector <4 x float> %load, <4 x float> undef,
<4 x i32> <i32 3, i32 2, i32 1, i32 0>
ret <4 x float> %ret
}
; Test v2f64 loads.
define <2 x double> @f6(<2 x double> *%ptr) {
; CHECK-LABEL: f6:
; CHECK: vlerg %v24, 0(%r2)
; CHECK: br %r14
%load = load <2 x double>, <2 x double> *%ptr
%ret = shufflevector <2 x double> %load, <2 x double> undef,
<2 x i32> <i32 1, i32 0>
ret <2 x double> %ret
}
; Test the highest aligned in-range offset.
define <4 x i32> @f7(<4 x i32> *%base) {
; CHECK-LABEL: f7:
; CHECK: vlerf %v24, 4080(%r2)
; CHECK: br %r14
%ptr = getelementptr <4 x i32>, <4 x i32> *%base, i64 255
%load = load <4 x i32>, <4 x i32> *%ptr
%ret = shufflevector <4 x i32> %load, <4 x i32> undef,
<4 x i32> <i32 3, i32 2, i32 1, i32 0>
ret <4 x i32> %ret
}
; Test the highest unaligned in-range offset.
define <4 x i32> @f8(i8 *%base) {
; CHECK-LABEL: f8:
; CHECK: vlerf %v24, 4095(%r2)
; CHECK: br %r14
%addr = getelementptr i8, i8 *%base, i64 4095
%ptr = bitcast i8 *%addr to <4 x i32> *
%load = load <4 x i32>, <4 x i32> *%ptr
%ret = shufflevector <4 x i32> %load, <4 x i32> undef,
<4 x i32> <i32 3, i32 2, i32 1, i32 0>
ret <4 x i32> %ret
}
; Test the next offset up, which requires separate address logic,
define <4 x i32> @f9(<4 x i32> *%base) {
; CHECK-LABEL: f9:
; CHECK: aghi %r2, 4096
; CHECK: vlerf %v24, 0(%r2)
; CHECK: br %r14
%ptr = getelementptr <4 x i32>, <4 x i32> *%base, i64 256
%load = load <4 x i32>, <4 x i32> *%ptr
%ret = shufflevector <4 x i32> %load, <4 x i32> undef,
<4 x i32> <i32 3, i32 2, i32 1, i32 0>
ret <4 x i32> %ret
}
; Test negative offsets, which also require separate address logic,
define <4 x i32> @f10(<4 x i32> *%base) {
; CHECK-LABEL: f10:
; CHECK: aghi %r2, -16
; CHECK: vlerf %v24, 0(%r2)
; CHECK: br %r14
%ptr = getelementptr <4 x i32>, <4 x i32> *%base, i64 -1
%load = load <4 x i32>, <4 x i32> *%ptr
%ret = shufflevector <4 x i32> %load, <4 x i32> undef,
<4 x i32> <i32 3, i32 2, i32 1, i32 0>
ret <4 x i32> %ret
}
; Check that indexes are allowed.
define <4 x i32> @f11(i8 *%base, i64 %index) {
; CHECK-LABEL: f11:
; CHECK: vlerf %v24, 0(%r3,%r2)
; CHECK: br %r14
%addr = getelementptr i8, i8 *%base, i64 %index
%ptr = bitcast i8 *%addr to <4 x i32> *
%load = load <4 x i32>, <4 x i32> *%ptr
%ret = shufflevector <4 x i32> %load, <4 x i32> undef,
<4 x i32> <i32 3, i32 2, i32 1, i32 0>
ret <4 x i32> %ret
}

View File

@ -0,0 +1,138 @@
; Test stores of element-swapped vector elements.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch13 | FileCheck %s
; Test v16i8 stores.
define void @f1(<16 x i8> %val, <16 x i8> *%ptr) {
; CHECK-LABEL: f1:
; CHECK: vstbrq %v24, 0(%r2)
; CHECK: br %r14
%swap = shufflevector <16 x i8> %val, <16 x i8> undef,
<16 x i32> <i32 15, i32 14, i32 13, i32 12,
i32 11, i32 10, i32 9, i32 8,
i32 7, i32 6, i32 5, i32 4,
i32 3, i32 2, i32 1, i32 0>
store <16 x i8> %swap, <16 x i8> *%ptr
ret void
}
; Test v8i16 stores.
define void @f2(<8 x i16> %val, <8 x i16> *%ptr) {
; CHECK-LABEL: f2:
; CHECK: vsterh %v24, 0(%r2)
; CHECK: br %r14
%swap = shufflevector <8 x i16> %val, <8 x i16> undef,
<8 x i32> <i32 7, i32 6, i32 5, i32 4,
i32 3, i32 2, i32 1, i32 0>
store <8 x i16> %swap, <8 x i16> *%ptr
ret void
}
; Test v4i32 stores.
define void @f3(<4 x i32> %val, <4 x i32> *%ptr) {
; CHECK-LABEL: f3:
; CHECK: vsterf %v24, 0(%r2)
; CHECK: br %r14
%swap = shufflevector <4 x i32> %val, <4 x i32> undef,
<4 x i32> <i32 3, i32 2, i32 1, i32 0>
store <4 x i32> %swap, <4 x i32> *%ptr
ret void
}
; Test v2i64 stores.
define void @f4(<2 x i64> %val, <2 x i64> *%ptr) {
; CHECK-LABEL: f4:
; CHECK: vsterg %v24, 0(%r2)
; CHECK: br %r14
%swap = shufflevector <2 x i64> %val, <2 x i64> undef,
<2 x i32> <i32 1, i32 0>
store <2 x i64> %swap, <2 x i64> *%ptr
ret void
}
; Test v4f32 stores.
define void @f5(<4 x float> %val, <4 x float> *%ptr) {
; CHECK-LABEL: f5:
; CHECK: vsterf %v24, 0(%r2)
; CHECK: br %r14
%swap = shufflevector <4 x float> %val, <4 x float> undef,
<4 x i32> <i32 3, i32 2, i32 1, i32 0>
store <4 x float> %swap, <4 x float> *%ptr
ret void
}
; Test v2f64 stores.
define void @f6(<2 x double> %val, <2 x double> *%ptr) {
; CHECK-LABEL: f6:
; CHECK: vsterg %v24, 0(%r2)
; CHECK: br %r14
%swap = shufflevector <2 x double> %val, <2 x double> undef,
<2 x i32> <i32 1, i32 0>
store <2 x double> %swap, <2 x double> *%ptr
ret void
}
; Test the highest aligned in-range offset.
define void @f7(<4 x i32> %val, <4 x i32> *%base) {
; CHECK-LABEL: f7:
; CHECK: vsterf %v24, 4080(%r2)
; CHECK: br %r14
%ptr = getelementptr <4 x i32>, <4 x i32> *%base, i64 255
%swap = shufflevector <4 x i32> %val, <4 x i32> undef,
<4 x i32> <i32 3, i32 2, i32 1, i32 0>
store <4 x i32> %swap, <4 x i32> *%ptr
ret void
}
; Test the highest unaligned in-range offset.
define void @f8(<4 x i32> %val, i8 *%base) {
; CHECK-LABEL: f8:
; CHECK: vsterf %v24, 4095(%r2)
; CHECK: br %r14
%addr = getelementptr i8, i8 *%base, i64 4095
%ptr = bitcast i8 *%addr to <4 x i32> *
%swap = shufflevector <4 x i32> %val, <4 x i32> undef,
<4 x i32> <i32 3, i32 2, i32 1, i32 0>
store <4 x i32> %swap, <4 x i32> *%ptr, align 1
ret void
}
; Test the next offset up, which requires separate address logic,
define void @f9(<4 x i32> %val, <4 x i32> *%base) {
; CHECK-LABEL: f9:
; CHECK: aghi %r2, 4096
; CHECK: vsterf %v24, 0(%r2)
; CHECK: br %r14
%ptr = getelementptr <4 x i32>, <4 x i32> *%base, i64 256
%swap = shufflevector <4 x i32> %val, <4 x i32> undef,
<4 x i32> <i32 3, i32 2, i32 1, i32 0>
store <4 x i32> %swap, <4 x i32> *%ptr
ret void
}
; Test negative offsets, which also require separate address logic,
define void @f10(<4 x i32> %val, <4 x i32> *%base) {
; CHECK-LABEL: f10:
; CHECK: aghi %r2, -16
; CHECK: vsterf %v24, 0(%r2)
; CHECK: br %r14
%ptr = getelementptr <4 x i32>, <4 x i32> *%base, i64 -1
%swap = shufflevector <4 x i32> %val, <4 x i32> undef,
<4 x i32> <i32 3, i32 2, i32 1, i32 0>
store <4 x i32> %swap, <4 x i32> *%ptr
ret void
}
; Check that indexes are allowed.
define void @f11(<4 x i32> %val, i8 *%base, i64 %index) {
; CHECK-LABEL: f11:
; CHECK: vsterf %v24, 0(%r3,%r2)
; CHECK: br %r14
%addr = getelementptr i8, i8 *%base, i64 %index
%ptr = bitcast i8 *%addr to <4 x i32> *
%swap = shufflevector <4 x i32> %val, <4 x i32> undef,
<4 x i32> <i32 3, i32 2, i32 1, i32 0>
store <4 x i32> %swap, <4 x i32> *%ptr, align 1
ret void
}

View File

@ -0,0 +1,154 @@
; Test vector intrinsics added with arch13.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch13 | FileCheck %s
declare <16 x i8> @llvm.s390.vsld(<16 x i8>, <16 x i8>, i32)
declare <16 x i8> @llvm.s390.vsrd(<16 x i8>, <16 x i8>, i32)
declare {<16 x i8>, i32} @llvm.s390.vstrsb(<16 x i8>, <16 x i8>, <16 x i8>)
declare {<16 x i8>, i32} @llvm.s390.vstrsh(<8 x i16>, <8 x i16>, <16 x i8>)
declare {<16 x i8>, i32} @llvm.s390.vstrsf(<4 x i32>, <4 x i32>, <16 x i8>)
declare {<16 x i8>, i32} @llvm.s390.vstrszb(<16 x i8>, <16 x i8>, <16 x i8>)
declare {<16 x i8>, i32} @llvm.s390.vstrszh(<8 x i16>, <8 x i16>, <16 x i8>)
declare {<16 x i8>, i32} @llvm.s390.vstrszf(<4 x i32>, <4 x i32>, <16 x i8>)
; VSLD with the minimum useful value.
define <16 x i8> @test_vsld_1(<16 x i8> %a, <16 x i8> %b) {
; CHECK-LABEL: test_vsld_1:
; CHECK: vsld %v24, %v24, %v26, 1
; CHECK: br %r14
%res = call <16 x i8> @llvm.s390.vsld(<16 x i8> %a, <16 x i8> %b, i32 1)
ret <16 x i8> %res
}
; VSLD with the maximum value.
define <16 x i8> @test_vsld_7(<16 x i8> %a, <16 x i8> %b) {
; CHECK-LABEL: test_vsld_7:
; CHECK: vsld %v24, %v24, %v26, 7
; CHECK: br %r14
%res = call <16 x i8> @llvm.s390.vsld(<16 x i8> %a, <16 x i8> %b, i32 7)
ret <16 x i8> %res
}
; VSRD with the minimum useful value.
define <16 x i8> @test_vsrd_1(<16 x i8> %a, <16 x i8> %b) {
; CHECK-LABEL: test_vsrd_1:
; CHECK: vsrd %v24, %v24, %v26, 1
; CHECK: br %r14
%res = call <16 x i8> @llvm.s390.vsrd(<16 x i8> %a, <16 x i8> %b, i32 1)
ret <16 x i8> %res
}
; VSRD with the maximum value.
define <16 x i8> @test_vsrd_7(<16 x i8> %a, <16 x i8> %b) {
; CHECK-LABEL: test_vsrd_7:
; CHECK: vsrd %v24, %v24, %v26, 7
; CHECK: br %r14
%res = call <16 x i8> @llvm.s390.vsrd(<16 x i8> %a, <16 x i8> %b, i32 7)
ret <16 x i8> %res
}
; VSTRSB.
define <16 x i8> @test_vstrsb(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c,
i32 *%ccptr) {
; CHECK-LABEL: test_vstrsb:
; CHECK: vstrsb %v24, %v24, %v26, %v28, 0
; CHECK: ipm [[REG:%r[0-5]]]
; CHECK: srl [[REG]], 28
; CHECK: st [[REG]], 0(%r2)
; CHECK: br %r14
%call = call {<16 x i8>, i32} @llvm.s390.vstrsb(<16 x i8> %a, <16 x i8> %b,
<16 x i8> %c)
%res = extractvalue {<16 x i8>, i32} %call, 0
%cc = extractvalue {<16 x i8>, i32} %call, 1
store i32 %cc, i32 *%ccptr
ret <16 x i8> %res
}
; VSTRSH.
define <16 x i8> @test_vstrsh(<8 x i16> %a, <8 x i16> %b, <16 x i8> %c,
i32 *%ccptr) {
; CHECK-LABEL: test_vstrsh:
; CHECK: vstrsh %v24, %v24, %v26, %v28, 0
; CHECK: ipm [[REG:%r[0-5]]]
; CHECK: srl [[REG]], 28
; CHECK: st [[REG]], 0(%r2)
; CHECK: br %r14
%call = call {<16 x i8>, i32} @llvm.s390.vstrsh(<8 x i16> %a, <8 x i16> %b,
<16 x i8> %c)
%res = extractvalue {<16 x i8>, i32} %call, 0
%cc = extractvalue {<16 x i8>, i32} %call, 1
store i32 %cc, i32 *%ccptr
ret <16 x i8> %res
}
; VSTRSFS.
define <16 x i8> @test_vstrsf(<4 x i32> %a, <4 x i32> %b, <16 x i8> %c,
i32 *%ccptr) {
; CHECK-LABEL: test_vstrsf:
; CHECK: vstrsf %v24, %v24, %v26, %v28, 0
; CHECK: ipm [[REG:%r[0-5]]]
; CHECK: srl [[REG]], 28
; CHECK: st [[REG]], 0(%r2)
; CHECK: br %r14
%call = call {<16 x i8>, i32} @llvm.s390.vstrsf(<4 x i32> %a, <4 x i32> %b,
<16 x i8> %c)
%res = extractvalue {<16 x i8>, i32} %call, 0
%cc = extractvalue {<16 x i8>, i32} %call, 1
store i32 %cc, i32 *%ccptr
ret <16 x i8> %res
}
; VSTRSZB.
define <16 x i8> @test_vstrszb(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c,
i32 *%ccptr) {
; CHECK-LABEL: test_vstrszb:
; CHECK: vstrszb %v24, %v24, %v26, %v28
; CHECK: ipm [[REG:%r[0-5]]]
; CHECK: srl [[REG]], 28
; CHECK: st [[REG]], 0(%r2)
; CHECK: br %r14
%call = call {<16 x i8>, i32} @llvm.s390.vstrszb(<16 x i8> %a, <16 x i8> %b,
<16 x i8> %c)
%res = extractvalue {<16 x i8>, i32} %call, 0
%cc = extractvalue {<16 x i8>, i32} %call, 1
store i32 %cc, i32 *%ccptr
ret <16 x i8> %res
}
; VSTRSZH.
define <16 x i8> @test_vstrszh(<8 x i16> %a, <8 x i16> %b, <16 x i8> %c,
i32 *%ccptr) {
; CHECK-LABEL: test_vstrszh:
; CHECK: vstrszh %v24, %v24, %v26, %v28
; CHECK: ipm [[REG:%r[0-5]]]
; CHECK: srl [[REG]], 28
; CHECK: st [[REG]], 0(%r2)
; CHECK: br %r14
%call = call {<16 x i8>, i32} @llvm.s390.vstrszh(<8 x i16> %a, <8 x i16> %b,
<16 x i8> %c)
%res = extractvalue {<16 x i8>, i32} %call, 0
%cc = extractvalue {<16 x i8>, i32} %call, 1
store i32 %cc, i32 *%ccptr
ret <16 x i8> %res
}
; VSTRSZF.
define <16 x i8> @test_vstrszf(<4 x i32> %a, <4 x i32> %b, <16 x i8> %c,
i32 *%ccptr) {
; CHECK-LABEL: test_vstrszf:
; CHECK: vstrszf %v24, %v24, %v26, %v28
; CHECK: ipm [[REG:%r[0-5]]]
; CHECK: srl [[REG]], 28
; CHECK: st [[REG]], 0(%r2)
; CHECK: br %r14
%call = call {<16 x i8>, i32} @llvm.s390.vstrszf(<4 x i32> %a, <4 x i32> %b,
<16 x i8> %c)
%res = extractvalue {<16 x i8>, i32} %call, 0
%cc = extractvalue {<16 x i8>, i32} %call, 1
store i32 %cc, i32 *%ccptr
ret <16 x i8> %res
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,881 @@
# For arch13 only.
# RUN: not llvm-mc -triple s390x-linux-gnu -mcpu=arch13 < %s 2> %t
# RUN: FileCheck < %t %s
#CHECK: error: invalid register pair
#CHECK: dfltcc %r1, %r2, %r4
#CHECK: error: invalid register pair
#CHECK: dfltcc %r2, %r1, %r4
dfltcc %r1, %r2, %r4
dfltcc %r2, %r1, %r4
#CHECK: error: invalid register pair
#CHECK: kdsa %r0, %r1
kdsa %r0, %r1
#CHECK: error: invalid operand
#CHECK: ldrv %f0, -1
#CHECK: error: invalid operand
#CHECK: ldrv %f0, 4096
#CHECK: error: invalid use of vector addressing
#CHECK: ldrv %f0, 0(%v1,%r2)
ldrv %f0, -1
ldrv %f0, 4096
ldrv %f0, 0(%v1,%r2)
#CHECK: error: invalid operand
#CHECK: lerv %f0, -1
#CHECK: error: invalid operand
#CHECK: lerv %f0, 4096
#CHECK: error: invalid use of vector addressing
#CHECK: lerv %f0, 0(%v1,%r2)
lerv %f0, -1
lerv %f0, 4096
lerv %f0, 0(%v1,%r2)
#CHECK: error: invalid use of indexed addressing
#CHECK: mvcrl 160(%r1,%r15),160(%r15)
#CHECK: error: invalid operand
#CHECK: mvcrl -1(%r1),160(%r15)
#CHECK: error: invalid operand
#CHECK: mvcrl 4096(%r1),160(%r15)
#CHECK: error: invalid operand
#CHECK: mvcrl 0(%r1),-1(%r15)
#CHECK: error: invalid operand
#CHECK: mvcrl 0(%r1),4096(%r15)
mvcrl 160(%r1,%r15),160(%r15)
mvcrl -1(%r1),160(%r15)
mvcrl 4096(%r1),160(%r15)
mvcrl 0(%r1),-1(%r15)
mvcrl 0(%r1),4096(%r15)
#CHECK: error: invalid operand
#CHECK: popcnt %r2, %r4, -1
#CHECK: error: invalid operand
#CHECK: popcnt %r2, %r4, 16
popcnt %r2, %r4, -1
popcnt %r2, %r4, 16
#CHECK: error: invalid operand
#CHECK: selgr %r0, %r0, %r0, -1
#CHECK: error: invalid operand
#CHECK: selgr %r0, %r0, %r0, 16
selgr %r0, %r0, %r0, -1
selgr %r0, %r0, %r0, 16
#CHECK: error: invalid operand
#CHECK: selfhr %r0, %r0, %r0, -1
#CHECK: error: invalid operand
#CHECK: selfhr %r0, %r0, %r0, 16
selfhr %r0, %r0, %r0, -1
selfhr %r0, %r0, %r0, 16
#CHECK: error: invalid operand
#CHECK: selr %r0, %r0, %r0, -1
#CHECK: error: invalid operand
#CHECK: selr %r0, %r0, %r0, 16
selr %r0, %r0, %r0, -1
selr %r0, %r0, %r0, 16
#CHECK: error: invalid register pair
#CHECK: sortl %r1, %r2
#CHECK: error: invalid register pair
#CHECK: sortl %r2, %r1
sortl %r1, %r2
sortl %r2, %r1
#CHECK: error: invalid operand
#CHECK: stdrv %f0, -1
#CHECK: error: invalid operand
#CHECK: stdrv %f0, 4096
#CHECK: error: invalid use of vector addressing
#CHECK: stdrv %f0, 0(%v1,%r2)
stdrv %f0, -1
stdrv %f0, 4096
stdrv %f0, 0(%v1,%r2)
#CHECK: error: invalid operand
#CHECK: sterv %f0, -1
#CHECK: error: invalid operand
#CHECK: sterv %f0, 4096
#CHECK: error: invalid use of vector addressing
#CHECK: sterv %f0, 0(%v1,%r2)
sterv %f0, -1
sterv %f0, 4096
sterv %f0, 0(%v1,%r2)
#CHECK: error: invalid operand
#CHECK: vcefb %v0, %v0, 0, -1
#CHECK: error: invalid operand
#CHECK: vcefb %v0, %v0, 0, 16
#CHECK: error: invalid operand
#CHECK: vcefb %v0, %v0, -1, 0
#CHECK: error: invalid operand
#CHECK: vcefb %v0, %v0, 16, 0
vcefb %v0, %v0, 0, -1
vcefb %v0, %v0, 0, 16
vcefb %v0, %v0, -1, 0
vcefb %v0, %v0, 16, 0
#CHECK: error: invalid operand
#CHECK: vcelfb %v0, %v0, 0, -1
#CHECK: error: invalid operand
#CHECK: vcelfb %v0, %v0, 0, 16
#CHECK: error: invalid operand
#CHECK: vcelfb %v0, %v0, -1, 0
#CHECK: error: invalid operand
#CHECK: vcelfb %v0, %v0, 16, 0
vcelfb %v0, %v0, 0, -1
vcelfb %v0, %v0, 0, 16
vcelfb %v0, %v0, -1, 0
vcelfb %v0, %v0, 16, 0
#CHECK: error: invalid operand
#CHECK: vcfeb %v0, %v0, 0, -1
#CHECK: error: invalid operand
#CHECK: vcfeb %v0, %v0, 0, 16
#CHECK: error: invalid operand
#CHECK: vcfeb %v0, %v0, -1, 0
#CHECK: error: invalid operand
#CHECK: vcfeb %v0, %v0, 16, 0
vcfeb %v0, %v0, 0, -1
vcfeb %v0, %v0, 0, 16
vcfeb %v0, %v0, -1, 0
vcfeb %v0, %v0, 16, 0
#CHECK: error: invalid operand
#CHECK: vcfpl %v0, %v0, 0, 0, -1
#CHECK: error: invalid operand
#CHECK: vcfpl %v0, %v0, 0, 0, 16
#CHECK: error: invalid operand
#CHECK: vcfpl %v0, %v0, 0, -1, 0
#CHECK: error: invalid operand
#CHECK: vcfpl %v0, %v0, 0, 16, 0
#CHECK: error: invalid operand
#CHECK: vcfpl %v0, %v0, -1, 0, 0
#CHECK: error: invalid operand
#CHECK: vcfpl %v0, %v0, 16, 0, 0
vcfpl %v0, %v0, 0, 0, -1
vcfpl %v0, %v0, 0, 0, 16
vcfpl %v0, %v0, 0, -1, 0
vcfpl %v0, %v0, 0, 16, 0
vcfpl %v0, %v0, -1, 0, 0
vcfpl %v0, %v0, 16, 0, 0
#CHECK: error: invalid operand
#CHECK: vcfps %v0, %v0, 0, 0, -1
#CHECK: error: invalid operand
#CHECK: vcfps %v0, %v0, 0, 0, 16
#CHECK: error: invalid operand
#CHECK: vcfps %v0, %v0, 0, -1, 0
#CHECK: error: invalid operand
#CHECK: vcfps %v0, %v0, 0, 16, 0
#CHECK: error: invalid operand
#CHECK: vcfps %v0, %v0, -1, 0, 0
#CHECK: error: invalid operand
#CHECK: vcfps %v0, %v0, 16, 0, 0
vcfps %v0, %v0, 0, 0, -1
vcfps %v0, %v0, 0, 0, 16
vcfps %v0, %v0, 0, -1, 0
vcfps %v0, %v0, 0, 16, 0
vcfps %v0, %v0, -1, 0, 0
vcfps %v0, %v0, 16, 0, 0
#CHECK: error: invalid operand
#CHECK: vclfeb %v0, %v0, 0, -1
#CHECK: error: invalid operand
#CHECK: vclfeb %v0, %v0, 0, 16
#CHECK: error: invalid operand
#CHECK: vclfeb %v0, %v0, -1, 0
#CHECK: error: invalid operand
#CHECK: vclfeb %v0, %v0, 16, 0
vclfeb %v0, %v0, 0, -1
vclfeb %v0, %v0, 0, 16
vclfeb %v0, %v0, -1, 0
vclfeb %v0, %v0, 16, 0
#CHECK: error: invalid operand
#CHECK: vclfp %v0, %v0, 0, 0, -1
#CHECK: error: invalid operand
#CHECK: vclfp %v0, %v0, 0, 0, 16
#CHECK: error: invalid operand
#CHECK: vclfp %v0, %v0, 0, -1, 0
#CHECK: error: invalid operand
#CHECK: vclfp %v0, %v0, 0, 16, 0
#CHECK: error: invalid operand
#CHECK: vclfp %v0, %v0, -1, 0, 0
#CHECK: error: invalid operand
#CHECK: vclfp %v0, %v0, 16, 0, 0
vclfp %v0, %v0, 0, 0, -1
vclfp %v0, %v0, 0, 0, 16
vclfp %v0, %v0, 0, -1, 0
vclfp %v0, %v0, 0, 16, 0
vclfp %v0, %v0, -1, 0, 0
vclfp %v0, %v0, 16, 0, 0
#CHECK: error: invalid operand
#CHECK: vcsfp %v0, %v0, 0, 0, -1
#CHECK: error: invalid operand
#CHECK: vcsfp %v0, %v0, 0, 0, 16
#CHECK: error: invalid operand
#CHECK: vcsfp %v0, %v0, 0, -1, 0
#CHECK: error: invalid operand
#CHECK: vcsfp %v0, %v0, 0, 16, 0
#CHECK: error: invalid operand
#CHECK: vcsfp %v0, %v0, -1, 0, 0
#CHECK: error: invalid operand
#CHECK: vcsfp %v0, %v0, 16, 0, 0
vcsfp %v0, %v0, 0, 0, -1
vcsfp %v0, %v0, 0, 0, 16
vcsfp %v0, %v0, 0, -1, 0
vcsfp %v0, %v0, 0, 16, 0
vcsfp %v0, %v0, -1, 0, 0
vcsfp %v0, %v0, 16, 0, 0
#CHECK: error: invalid operand
#CHECK: vcvb %r0, %v0, 0, -1
#CHECK: error: invalid operand
#CHECK: vcvb %r0, %v0, 0, 16
vcvb %r0, %v0, 0, -1
vcvb %r0, %v0, 0, 16
#CHECK: error: invalid operand
#CHECK: vcvbg %r0, %v0, 0, -1
#CHECK: error: invalid operand
#CHECK: vcvbg %r0, %v0, 0, 16
vcvbg %r0, %v0, 0, -1
vcvbg %r0, %v0, 0, 16
#CHECK: error: invalid operand
#CHECK: vlbr %v0, 0, -1
#CHECK: error: invalid operand
#CHECK: vlbr %v0, 0, 16
#CHECK: error: invalid operand
#CHECK: vlbr %v0, -1, 0
#CHECK: error: invalid operand
#CHECK: vlbr %v0, 4096, 0
#CHECK: error: invalid use of vector addressing
#CHECK: vlbr %v0, 0(%v1,%r2), 0
vlbr %v0, 0, -1
vlbr %v0, 0, 16
vlbr %v0, -1, 0
vlbr %v0, 4096, 0
vlbr %v0, 0(%v1,%r2), 0
#CHECK: error: invalid operand
#CHECK: vlbrf %v0, -1
#CHECK: error: invalid operand
#CHECK: vlbrf %v0, 4096
#CHECK: error: invalid use of vector addressing
#CHECK: vlbrf %v0, 0(%v1,%r2)
vlbrf %v0, -1
vlbrf %v0, 4096
vlbrf %v0, 0(%v1,%r2)
#CHECK: error: invalid operand
#CHECK: vlbrg %v0, -1
#CHECK: error: invalid operand
#CHECK: vlbrg %v0, 4096
#CHECK: error: invalid use of vector addressing
#CHECK: vlbrg %v0, 0(%v1,%r2)
vlbrg %v0, -1
vlbrg %v0, 4096
vlbrg %v0, 0(%v1,%r2)
#CHECK: error: invalid operand
#CHECK: vlbrh %v0, -1
#CHECK: error: invalid operand
#CHECK: vlbrh %v0, 4096
#CHECK: error: invalid use of vector addressing
#CHECK: vlbrh %v0, 0(%v1,%r2)
vlbrh %v0, -1
vlbrh %v0, 4096
vlbrh %v0, 0(%v1,%r2)
#CHECK: error: invalid operand
#CHECK: vlbrq %v0, -1
#CHECK: error: invalid operand
#CHECK: vlbrq %v0, 4096
#CHECK: error: invalid use of vector addressing
#CHECK: vlbrq %v0, 0(%v1,%r2)
vlbrq %v0, -1
vlbrq %v0, 4096
vlbrq %v0, 0(%v1,%r2)
#CHECK: error: invalid operand
#CHECK: vlbrrep %v0, 0, -1
#CHECK: error: invalid operand
#CHECK: vlbrrep %v0, 0, 16
#CHECK: error: invalid operand
#CHECK: vlbrrep %v0, -1, 0
#CHECK: error: invalid operand
#CHECK: vlbrrep %v0, 4096, 0
#CHECK: error: invalid use of vector addressing
#CHECK: vlbrrep %v0, 0(%v1,%r2), 0
vlbrrep %v0, 0, -1
vlbrrep %v0, 0, 16
vlbrrep %v0, -1, 0
vlbrrep %v0, 4096, 0
vlbrrep %v0, 0(%v1,%r2), 0
#CHECK: error: invalid operand
#CHECK: vlbrrepf %v0, -1
#CHECK: error: invalid operand
#CHECK: vlbrrepf %v0, 4096
#CHECK: error: invalid use of vector addressing
#CHECK: vlbrrepf %v0, 0(%v1,%r2)
vlbrrepf %v0, -1
vlbrrepf %v0, 4096
vlbrrepf %v0, 0(%v1,%r2)
#CHECK: error: invalid operand
#CHECK: vlbrrepg %v0, -1
#CHECK: error: invalid operand
#CHECK: vlbrrepg %v0, 4096
#CHECK: error: invalid use of vector addressing
#CHECK: vlbrrepg %v0, 0(%v1,%r2)
vlbrrepg %v0, -1
vlbrrepg %v0, 4096
vlbrrepg %v0, 0(%v1,%r2)
#CHECK: error: invalid operand
#CHECK: vlbrreph %v0, -1
#CHECK: error: invalid operand
#CHECK: vlbrreph %v0, 4096
#CHECK: error: invalid use of vector addressing
#CHECK: vlbrreph %v0, 0(%v1,%r2)
vlbrreph %v0, -1
vlbrreph %v0, 4096
vlbrreph %v0, 0(%v1,%r2)
#CHECK: error: invalid operand
#CHECK: vlebrf %v0, 0, -1
#CHECK: error: invalid operand
#CHECK: vlebrf %v0, 0, 4
#CHECK: error: invalid operand
#CHECK: vlebrf %v0, -1, 0
#CHECK: error: invalid operand
#CHECK: vlebrf %v0, 4096, 0
#CHECK: error: invalid use of vector addressing
#CHECK: vlebrf %v0, 0(%v1,%r2), 0
vlebrf %v0, 0, -1
vlebrf %v0, 0, 4
vlebrf %v0, -1, 0
vlebrf %v0, 4096, 0
vlebrf %v0, 0(%v1,%r2), 0
#CHECK: error: invalid operand
#CHECK: vlebrg %v0, 0, -1
#CHECK: error: invalid operand
#CHECK: vlebrg %v0, 0, 2
#CHECK: error: invalid operand
#CHECK: vlebrg %v0, -1, 0
#CHECK: error: invalid operand
#CHECK: vlebrg %v0, 4096, 0
#CHECK: error: invalid use of vector addressing
#CHECK: vlebrg %v0, 0(%v1,%r2), 0
vlebrg %v0, 0, -1
vlebrg %v0, 0, 2
vlebrg %v0, -1, 0
vlebrg %v0, 4096, 0
vlebrg %v0, 0(%v1,%r2), 0
#CHECK: error: invalid operand
#CHECK: vlebrh %v0, 0, -1
#CHECK: error: invalid operand
#CHECK: vlebrh %v0, 0, 8
#CHECK: error: invalid operand
#CHECK: vlebrh %v0, -1, 0
#CHECK: error: invalid operand
#CHECK: vlebrh %v0, 4096, 0
#CHECK: error: invalid use of vector addressing
#CHECK: vlebrh %v0, 0(%v1,%r2), 0
vlebrh %v0, 0, -1
vlebrh %v0, 0, 8
vlebrh %v0, -1, 0
vlebrh %v0, 4096, 0
vlebrh %v0, 0(%v1,%r2), 0
#CHECK: error: invalid operand
#CHECK: vler %v0, 0, -1
#CHECK: error: invalid operand
#CHECK: vler %v0, 0, 16
#CHECK: error: invalid operand
#CHECK: vler %v0, -1, 0
#CHECK: error: invalid operand
#CHECK: vler %v0, 4096, 0
#CHECK: error: invalid use of vector addressing
#CHECK: vler %v0, 0(%v1,%r2), 0
vler %v0, 0, -1
vler %v0, 0, 16
vler %v0, -1, 0
vler %v0, 4096, 0
vler %v0, 0(%v1,%r2), 0
#CHECK: error: invalid operand
#CHECK: vlerf %v0, -1
#CHECK: error: invalid operand
#CHECK: vlerf %v0, 4096
#CHECK: error: invalid use of vector addressing
#CHECK: vlerf %v0, 0(%v1,%r2)
vlerf %v0, -1
vlerf %v0, 4096
vlerf %v0, 0(%v1,%r2)
#CHECK: error: invalid operand
#CHECK: vlerg %v0, -1
#CHECK: error: invalid operand
#CHECK: vlerg %v0, 4096
#CHECK: error: invalid use of vector addressing
#CHECK: vlerg %v0, 0(%v1,%r2)
vlerg %v0, -1
vlerg %v0, 4096
vlerg %v0, 0(%v1,%r2)
#CHECK: error: invalid operand
#CHECK: vlerh %v0, -1
#CHECK: error: invalid operand
#CHECK: vlerh %v0, 4096
#CHECK: error: invalid use of vector addressing
#CHECK: vlerh %v0, 0(%v1,%r2)
vlerh %v0, -1
vlerh %v0, 4096
vlerh %v0, 0(%v1,%r2)
#CHECK: error: invalid operand
#CHECK: vllebrz %v0, 0, -1
#CHECK: error: invalid operand
#CHECK: vllebrz %v0, 0, 16
#CHECK: error: invalid operand
#CHECK: vllebrz %v0, -1, 0
#CHECK: error: invalid operand
#CHECK: vllebrz %v0, 4096, 0
#CHECK: error: invalid use of vector addressing
#CHECK: vllebrz %v0, 0(%v1,%r2), 0
vllebrz %v0, 0, -1
vllebrz %v0, 0, 16
vllebrz %v0, -1, 0
vllebrz %v0, 4096, 0
vllebrz %v0, 0(%v1,%r2), 0
#CHECK: error: invalid operand
#CHECK: vllebrze %v0, -1
#CHECK: error: invalid operand
#CHECK: vllebrze %v0, 4096
#CHECK: error: invalid use of vector addressing
#CHECK: vllebrze %v0, 0(%v1,%r2)
vllebrze %v0, -1
vllebrze %v0, 4096
vllebrze %v0, 0(%v1,%r2)
#CHECK: error: invalid operand
#CHECK: vllebrzf %v0, -1
#CHECK: error: invalid operand
#CHECK: vllebrzf %v0, 4096
#CHECK: error: invalid use of vector addressing
#CHECK: vllebrzf %v0, 0(%v1,%r2)
vllebrzf %v0, -1
vllebrzf %v0, 4096
vllebrzf %v0, 0(%v1,%r2)
#CHECK: error: invalid operand
#CHECK: vllebrzg %v0, -1
#CHECK: error: invalid operand
#CHECK: vllebrzg %v0, 4096
#CHECK: error: invalid use of vector addressing
#CHECK: vllebrzg %v0, 0(%v1,%r2)
vllebrzg %v0, -1
vllebrzg %v0, 4096
vllebrzg %v0, 0(%v1,%r2)
#CHECK: error: invalid operand
#CHECK: vllebrzh %v0, -1
#CHECK: error: invalid operand
#CHECK: vllebrzh %v0, 4096
#CHECK: error: invalid use of vector addressing
#CHECK: vllebrzh %v0, 0(%v1,%r2)
vllebrzh %v0, -1
vllebrzh %v0, 4096
vllebrzh %v0, 0(%v1,%r2)
#CHECK: error: invalid operand
#CHECK: vsld %v0, %v0, %v0, -1
#CHECK: error: invalid operand
#CHECK: vsld %v0, %v0, %v0, 256
vsld %v0, %v0, %v0, -1
vsld %v0, %v0, %v0, 256
#CHECK: error: invalid operand
#CHECK: vsrd %v0, %v0, %v0, -1
#CHECK: error: invalid operand
#CHECK: vsrd %v0, %v0, %v0, 256
vsrd %v0, %v0, %v0, -1
vsrd %v0, %v0, %v0, 256
#CHECK: error: invalid operand
#CHECK: vstbr %v0, 0, -1
#CHECK: error: invalid operand
#CHECK: vstbr %v0, 0, 16
#CHECK: error: invalid operand
#CHECK: vstbr %v0, -1, 0
#CHECK: error: invalid operand
#CHECK: vstbr %v0, 4096, 0
#CHECK: error: invalid use of vector addressing
#CHECK: vstbr %v0, 0(%v1,%r2), 0
vstbr %v0, 0, -1
vstbr %v0, 0, 16
vstbr %v0, -1, 0
vstbr %v0, 4096, 0
vstbr %v0, 0(%v1,%r2), 0
#CHECK: error: invalid operand
#CHECK: vstbrf %v0, -1
#CHECK: error: invalid operand
#CHECK: vstbrf %v0, 4096
#CHECK: error: invalid use of vector addressing
#CHECK: vstbrf %v0, 0(%v1,%r2)
vstbrf %v0, -1
vstbrf %v0, 4096
vstbrf %v0, 0(%v1,%r2)
#CHECK: error: invalid operand
#CHECK: vstbrg %v0, -1
#CHECK: error: invalid operand
#CHECK: vstbrg %v0, 4096
#CHECK: error: invalid use of vector addressing
#CHECK: vstbrg %v0, 0(%v1,%r2)
vstbrg %v0, -1
vstbrg %v0, 4096
vstbrg %v0, 0(%v1,%r2)
#CHECK: error: invalid operand
#CHECK: vstbrh %v0, -1
#CHECK: error: invalid operand
#CHECK: vstbrh %v0, 4096
#CHECK: error: invalid use of vector addressing
#CHECK: vstbrh %v0, 0(%v1,%r2)
vstbrh %v0, -1
vstbrh %v0, 4096
vstbrh %v0, 0(%v1,%r2)
#CHECK: error: invalid operand
#CHECK: vstbrq %v0, -1
#CHECK: error: invalid operand
#CHECK: vstbrq %v0, 4096
#CHECK: error: invalid use of vector addressing
#CHECK: vstbrq %v0, 0(%v1,%r2)
vstbrq %v0, -1
vstbrq %v0, 4096
vstbrq %v0, 0(%v1,%r2)
#CHECK: error: invalid operand
#CHECK: vstebrf %v0, 0, -1
#CHECK: error: invalid operand
#CHECK: vstebrf %v0, 0, 4
#CHECK: error: invalid operand
#CHECK: vstebrf %v0, -1, 0
#CHECK: error: invalid operand
#CHECK: vstebrf %v0, 4096, 0
#CHECK: error: invalid use of vector addressing
#CHECK: vstebrf %v0, 0(%v1,%r2), 0
vstebrf %v0, 0, -1
vstebrf %v0, 0, 4
vstebrf %v0, -1, 0
vstebrf %v0, 4096, 0
vstebrf %v0, 0(%v1,%r2), 0
#CHECK: error: invalid operand
#CHECK: vstebrg %v0, 0, -1
#CHECK: error: invalid operand
#CHECK: vstebrg %v0, 0, 2
#CHECK: error: invalid operand
#CHECK: vstebrg %v0, -1, 0
#CHECK: error: invalid operand
#CHECK: vstebrg %v0, 4096, 0
#CHECK: error: invalid use of vector addressing
#CHECK: vstebrg %v0, 0(%v1,%r2), 0
vstebrg %v0, 0, -1
vstebrg %v0, 0, 2
vstebrg %v0, -1, 0
vstebrg %v0, 4096, 0
vstebrg %v0, 0(%v1,%r2), 0
#CHECK: error: invalid operand
#CHECK: vstebrh %v0, 0, -1
#CHECK: error: invalid operand
#CHECK: vstebrh %v0, 0, 8
#CHECK: error: invalid operand
#CHECK: vstebrh %v0, -1, 0
#CHECK: error: invalid operand
#CHECK: vstebrh %v0, 4096, 0
#CHECK: error: invalid use of vector addressing
#CHECK: vstebrh %v0, 0(%v1,%r2), 0
vstebrh %v0, 0, -1
vstebrh %v0, 0, 8
vstebrh %v0, -1, 0
vstebrh %v0, 4096, 0
vstebrh %v0, 0(%v1,%r2), 0
#CHECK: error: invalid operand
#CHECK: vster %v0, 0, -1
#CHECK: error: invalid operand
#CHECK: vster %v0, 0, 16
#CHECK: error: invalid operand
#CHECK: vster %v0, -1, 0
#CHECK: error: invalid operand
#CHECK: vster %v0, 4096, 0
#CHECK: error: invalid use of vector addressing
#CHECK: vster %v0, 0(%v1,%r2), 0
vster %v0, 0, -1
vster %v0, 0, 16
vster %v0, -1, 0
vster %v0, 4096, 0
vster %v0, 0(%v1,%r2), 0
#CHECK: error: invalid operand
#CHECK: vsterf %v0, -1
#CHECK: error: invalid operand
#CHECK: vsterf %v0, 4096
#CHECK: error: invalid use of vector addressing
#CHECK: vsterf %v0, 0(%v1,%r2)
vsterf %v0, -1
vsterf %v0, 4096
vsterf %v0, 0(%v1,%r2)
#CHECK: error: invalid operand
#CHECK: vsterg %v0, -1
#CHECK: error: invalid operand
#CHECK: vsterg %v0, 4096
#CHECK: error: invalid use of vector addressing
#CHECK: vsterg %v0, 0(%v1,%r2)
vsterg %v0, -1
vsterg %v0, 4096
vsterg %v0, 0(%v1,%r2)
#CHECK: error: invalid operand
#CHECK: vsterh %v0, -1
#CHECK: error: invalid operand
#CHECK: vsterh %v0, 4096
#CHECK: error: invalid use of vector addressing
#CHECK: vsterh %v0, 0(%v1,%r2)
vsterh %v0, -1
vsterh %v0, 4096
vsterh %v0, 0(%v1,%r2)
#CHECK: error: invalid operand
#CHECK: vstrs %v0, %v0, %v0, %v0, 0, -1
#CHECK: error: invalid operand
#CHECK: vstrs %v0, %v0, %v0, %v0, 0, 16
#CHECK: error: invalid operand
#CHECK: vstrs %v0, %v0, %v0, %v0, -1, 0
#CHECK: error: invalid operand
#CHECK: vstrs %v0, %v0, %v0, %v0, 16, 0
#CHECK: error: too few operands
#CHECK: vstrs %v0, %v0, %v0, %v0
#CHECK: error: invalid operand
#CHECK: vstrs %v0, %v0, %v0, %v0, 0, 0, 0
vstrs %v0, %v0, %v0, %v0, 0, -1
vstrs %v0, %v0, %v0, %v0, 0, 16
vstrs %v0, %v0, %v0, %v0, -1, 0
vstrs %v0, %v0, %v0, %v0, 16, 0
vstrs %v0, %v0, %v0, %v0
vstrs %v0, %v0, %v0, %v0, 0, 0, 0
#CHECK: error: invalid operand
#CHECK: vstrsb %v0, %v0, %v0, %v0, -1
#CHECK: error: invalid operand
#CHECK: vstrsb %v0, %v0, %v0, %v0, 16
#CHECK: error: too few operands
#CHECK: vstrsb %v0, %v0, %v0
#CHECK: error: invalid operand
#CHECK: vstrsb %v0, %v0, %v0, %v0, 0, 0
vstrsb %v0, %v0, %v0, %v0, -1
vstrsb %v0, %v0, %v0, %v0, 16
vstrsb %v0, %v0, %v0
vstrsb %v0, %v0, %v0, %v0, 0, 0
#CHECK: error: invalid operand
#CHECK: vstrsf %v0, %v0, %v0, %v0, -1
#CHECK: error: invalid operand
#CHECK: vstrsf %v0, %v0, %v0, %v0, 16
#CHECK: error: too few operands
#CHECK: vstrsf %v0, %v0, %v0
#CHECK: error: invalid operand
#CHECK: vstrsf %v0, %v0, %v0, %v0, 0, 0
vstrsf %v0, %v0, %v0, %v0, -1
vstrsf %v0, %v0, %v0, %v0, 16
vstrsf %v0, %v0, %v0
vstrsf %v0, %v0, %v0, %v0, 0, 0
#CHECK: error: invalid operand
#CHECK: vstrsh %v0, %v0, %v0, %v0, -1
#CHECK: error: invalid operand
#CHECK: vstrsh %v0, %v0, %v0, %v0, 16
#CHECK: error: too few operands
#CHECK: vstrsh %v0, %v0, %v0
#CHECK: error: invalid operand
#CHECK: vstrsh %v0, %v0, %v0, %v0, 0, 0
vstrsh %v0, %v0, %v0, %v0, -1
vstrsh %v0, %v0, %v0, %v0, 16
vstrsh %v0, %v0, %v0
vstrsh %v0, %v0, %v0, %v0, 0, 0
#CHECK: error: invalid operand
#CHECK: vstrszb %v0, %v0, %v0, %v0, -1
#CHECK: error: invalid operand
#CHECK: vstrszb %v0, %v0, %v0, %v0, 16
#CHECK: error: too few operands
#CHECK: vstrszb %v0, %v0, %v0
#CHECK: error: invalid operand
#CHECK: vstrszb %v0, %v0, %v0, %v0, 0, 0
vstrszb %v0, %v0, %v0, %v0, -1
vstrszb %v0, %v0, %v0, %v0, 16
vstrszb %v0, %v0, %v0
vstrszb %v0, %v0, %v0, %v0, 0, 0
#CHECK: error: invalid operand
#CHECK: vstrszf %v0, %v0, %v0, %v0, -1
#CHECK: error: invalid operand
#CHECK: vstrszf %v0, %v0, %v0, %v0, 16
#CHECK: error: too few operands
#CHECK: vstrszf %v0, %v0, %v0
#CHECK: error: invalid operand
#CHECK: vstrszf %v0, %v0, %v0, %v0, 0, 0
vstrszf %v0, %v0, %v0, %v0, -1
vstrszf %v0, %v0, %v0, %v0, 16
vstrszf %v0, %v0, %v0
vstrszf %v0, %v0, %v0, %v0, 0, 0
#CHECK: error: invalid operand
#CHECK: vstrszh %v0, %v0, %v0, %v0, -1
#CHECK: error: invalid operand
#CHECK: vstrszh %v0, %v0, %v0, %v0, 16
#CHECK: error: too few operands
#CHECK: vstrszh %v0, %v0, %v0
#CHECK: error: invalid operand
#CHECK: vstrszh %v0, %v0, %v0, %v0, 0, 0
vstrszh %v0, %v0, %v0, %v0, -1
vstrszh %v0, %v0, %v0, %v0, 16
vstrszh %v0, %v0, %v0
vstrszh %v0, %v0, %v0, %v0, 0, 0
#CHECK: error: invalid operand
#CHECK: wcefb %v0, %v0, 0, -1
#CHECK: error: invalid operand
#CHECK: wcefb %v0, %v0, 0, 16
#CHECK: error: invalid operand
#CHECK: wcefb %v0, %v0, -1, 0
#CHECK: error: invalid operand
#CHECK: wcefb %v0, %v0, 16, 0
wcefb %v0, %v0, 0, -1
wcefb %v0, %v0, 0, 16
wcefb %v0, %v0, -1, 0
wcefb %v0, %v0, 16, 0
#CHECK: error: invalid operand
#CHECK: wcelfb %v0, %v0, 0, -1
#CHECK: error: invalid operand
#CHECK: wcelfb %v0, %v0, 0, 16
#CHECK: error: invalid operand
#CHECK: wcelfb %v0, %v0, -1, 0
#CHECK: error: invalid operand
#CHECK: wcelfb %v0, %v0, 16, 0
wcelfb %v0, %v0, 0, -1
wcelfb %v0, %v0, 0, 16
wcelfb %v0, %v0, -1, 0
wcelfb %v0, %v0, 16, 0
#CHECK: error: invalid operand
#CHECK: wcfeb %v0, %v0, 0, -1
#CHECK: error: invalid operand
#CHECK: wcfeb %v0, %v0, 0, 16
#CHECK: error: invalid operand
#CHECK: wcfeb %v0, %v0, -1, 0
#CHECK: error: invalid operand
#CHECK: wcfeb %v0, %v0, 16, 0
wcfeb %v0, %v0, 0, -1
wcfeb %v0, %v0, 0, 16
wcfeb %v0, %v0, -1, 0
wcfeb %v0, %v0, 16, 0
#CHECK: error: invalid operand
#CHECK: wclfeb %v0, %v0, 0, -1
#CHECK: error: invalid operand
#CHECK: wclfeb %v0, %v0, 0, 16
#CHECK: error: invalid operand
#CHECK: wclfeb %v0, %v0, -1, 0
#CHECK: error: invalid operand
#CHECK: wclfeb %v0, %v0, 16, 0
wclfeb %v0, %v0, 0, -1
wclfeb %v0, %v0, 0, 16
wclfeb %v0, %v0, -1, 0
wclfeb %v0, %v0, 16, 0

View File

@ -34,6 +34,16 @@
agh %r0, -524289
agh %r0, 524288
#CHECK: error: instruction requires: deflate-conversion
#CHECK: dfltcc %r2, %r4, %r6
dfltcc %r2, %r4, %r6
#CHECK: error: instruction requires: message-security-assist-extension9
#CHECK: kdsa %r0, %r2
kdsa %r0, %r2
#CHECK: error: invalid register pair
#CHECK: kma %r1, %r2, %r4
#CHECK: error: invalid register pair
@ -109,6 +119,66 @@
msgc %r0, -524289
msgc %r0, 524288
#CHECK: error: instruction requires: miscellaneous-extensions-3
#CHECK: mvcrl 0, 0
mvcrl 0, 0
#CHECK: error: instruction requires: miscellaneous-extensions-3
#CHECK: ncgrk %r0, %r0, %r0
ncgrk %r0, %r0, %r0
#CHECK: error: instruction requires: miscellaneous-extensions-3
#CHECK: ncrk %r0, %r0, %r0
ncrk %r0, %r0, %r0
#CHECK: error: instruction requires: miscellaneous-extensions-3
#CHECK: nngrk %r0, %r0, %r0
nngrk %r0, %r0, %r0
#CHECK: error: instruction requires: miscellaneous-extensions-3
#CHECK: nnrk %r0, %r0, %r0
nnrk %r0, %r0, %r0
#CHECK: error: instruction requires: miscellaneous-extensions-3
#CHECK: nogrk %r0, %r0, %r0
nogrk %r0, %r0, %r0
#CHECK: error: instruction requires: miscellaneous-extensions-3
#CHECK: nork %r0, %r0, %r0
nork %r0, %r0, %r0
#CHECK: error: instruction requires: miscellaneous-extensions-3
#CHECK: nxgrk %r0, %r0, %r0
nxgrk %r0, %r0, %r0
#CHECK: error: instruction requires: miscellaneous-extensions-3
#CHECK: nxrk %r0, %r0, %r0
nxrk %r0, %r0, %r0
#CHECK: error: instruction requires: miscellaneous-extensions-3
#CHECK: ocgrk %r0, %r0, %r0
ocgrk %r0, %r0, %r0
#CHECK: error: instruction requires: miscellaneous-extensions-3
#CHECK: ocrk %r0, %r0, %r0
ocrk %r0, %r0, %r0
#CHECK: error: instruction requires: miscellaneous-extensions-3
#CHECK: popcnt %r2, %r4, 1
popcnt %r2, %r4, 1
#CHECK: error: invalid register pair
#CHECK: prno %r1, %r2
#CHECK: error: invalid register pair
@ -117,6 +187,30 @@
prno %r1, %r2
prno %r2, %r1
#CHECK: error: instruction requires: miscellaneous-extensions-3
#CHECK: selgr %r0, %r0, %r0, 0
#CHECK: error: instruction requires: miscellaneous-extensions-3
#CHECK: selgre %r0, %r0, %r0
selgr %r0, %r0, %r0, 0
selgre %r0, %r0, %r0
#CHECK: error: instruction requires: miscellaneous-extensions-3
#CHECK: selfhr %r0, %r0, %r0, 0
#CHECK: error: instruction requires: miscellaneous-extensions-3
#CHECK: selfhre %r0, %r0, %r0
selfhr %r0, %r0, %r0, 0
selfhre %r0, %r0, %r0
#CHECK: error: instruction requires: miscellaneous-extensions-3
#CHECK: selr %r0, %r0, %r0, 0
#CHECK: error: instruction requires: miscellaneous-extensions-3
#CHECK: selre %r0, %r0, %r0
selr %r0, %r0, %r0, 0
selre %r0, %r0, %r0
#CHECK: error: invalid operand
#CHECK: sgh %r0, -524289
#CHECK: error: invalid operand
@ -125,6 +219,11 @@
sgh %r0, -524289
sgh %r0, 524288
#CHECK: error: instruction requires: enhanced-sort
#CHECK: sortl %r2, %r4
sortl %r2, %r4
#CHECK: error: invalid operand
#CHECK: stgsc %r0, -524289
#CHECK: error: invalid operand
@ -147,6 +246,41 @@
vap %v0, %v0, %v0, -1, 0
vap %v0, %v0, %v0, 256, 0
#CHECK: error: instruction requires: vector-enhancements-2
#CHECK: vcefb %v0, %v0, 0, 0
vcefb %v0, %v0, 0, 0
#CHECK: error: instruction requires: vector-enhancements-2
#CHECK: vcelfb %v0, %v0, 0, 0
vcelfb %v0, %v0, 0, 0
#CHECK: error: instruction requires: vector-enhancements-2
#CHECK: vcfeb %v0, %v0, 0, 0
vcfeb %v0, %v0, 0, 0
#CHECK: error: instruction requires: vector-enhancements-2
#CHECK: vcfpl %v0, %v0, 0, 0, 0
vcfpl %v0, %v0, 0, 0, 0
#CHECK: error: instruction requires: vector-enhancements-2
#CHECK: vcfps %v0, %v0, 0, 0, 0
vcfps %v0, %v0, 0, 0, 0
#CHECK: error: instruction requires: vector-enhancements-2
#CHECK: vclfeb %v0, %v0, 0, 0
vclfeb %v0, %v0, 0, 0
#CHECK: error: instruction requires: vector-enhancements-2
#CHECK: vclfp %v0, %v0, 0, 0, 0
vclfp %v0, %v0, 0, 0, 0
#CHECK: error: invalid operand
#CHECK: vcp %v0, %v0, -1
#CHECK: error: invalid operand
@ -155,21 +289,32 @@
vcp %v0, %v0, -1
vcp %v0, %v0, 16
#CHECK: error: instruction requires: vector-enhancements-2
#CHECK: vcsfp %v0, %v0, 0, 0, 0
vcsfp %v0, %v0, 0, 0, 0
#CHECK: error: invalid operand
#CHECK: vcvb %r0, %v0, -1
#CHECK: error: invalid operand
#CHECK: vcvb %r0, %v0, 16
#CHECK: error: instruction requires: vector-packed-decimal-enhancement
#CHECK: vcvb %r0, %v0, 0, 1
vcvb %r0, %v0, -1
vcvb %r0, %v0, 16
vcvb %r0, %v0, 0, 1
#CHECK: error: invalid operand
#CHECK: vcvbg %r0, %v0, -1
#CHECK: error: invalid operand
#CHECK: vcvbg %r0, %v0, 16
#CHECK: error: instruction requires: vector-packed-decimal-enhancement
#CHECK: vcvbg %r0, %v0, 0, 1
vcvbg %r0, %v0, -1
vcvbg %r0, %v0, 16
vcvbg %r0, %v0, 0, 1
#CHECK: error: invalid operand
#CHECK: vcvd %r0, %v0, 0, -1
@ -408,6 +553,79 @@
vllezlf %v0, 4096
vllezlf %v0, 0(%v1,%r2)
#CHECK: error: instruction requires: vector-enhancements-2
#CHECK: vlbr %v0, 0, 0
#CHECK: error: instruction requires: vector-enhancements-2
#CHECK: vlbrf %v0, 0
#CHECK: error: instruction requires: vector-enhancements-2
#CHECK: vlbrg %v0, 0
#CHECK: error: instruction requires: vector-enhancements-2
#CHECK: vlbrh %v0, 0
#CHECK: error: instruction requires: vector-enhancements-2
#CHECK: vlbrq %v0, 0
vlbr %v0, 0, 0
vlbrf %v0, 0
vlbrg %v0, 0
vlbrh %v0, 0
vlbrq %v0, 0
#CHECK: error: instruction requires: vector-enhancements-2
#CHECK: vlbrrep %v0, 0, 0
#CHECK: error: instruction requires: vector-enhancements-2
#CHECK: vlbrrepf %v0, 0
#CHECK: error: instruction requires: vector-enhancements-2
#CHECK: vlbrrepg %v0, 0
#CHECK: error: instruction requires: vector-enhancements-2
#CHECK: vlbrreph %v0, 0
vlbrrep %v0, 0, 0
vlbrrepf %v0, 0
vlbrrepg %v0, 0
vlbrreph %v0, 0
#CHECK: error: instruction requires: vector-enhancements-2
#CHECK: vlebrf %v0, 0, 0
#CHECK: error: instruction requires: vector-enhancements-2
#CHECK: vlebrg %v0, 0, 0
#CHECK: error: instruction requires: vector-enhancements-2
#CHECK: vlebrh %v0, 0, 0
vlebrf %v0, 0, 0
vlebrg %v0, 0, 0
vlebrh %v0, 0, 0
#CHECK: error: instruction requires: vector-enhancements-2
#CHECK: vler %v0, 0, 0
#CHECK: error: instruction requires: vector-enhancements-2
#CHECK: vlerf %v0, 0
#CHECK: error: instruction requires: vector-enhancements-2
#CHECK: vlerg %v0, 0
#CHECK: error: instruction requires: vector-enhancements-2
#CHECK: vlerh %v0, 0
vler %v0, 0, 0
vlerf %v0, 0
vlerg %v0, 0
vlerh %v0, 0
#CHECK: error: instruction requires: vector-enhancements-2
#CHECK: vllebrz %v0, 0, 0
#CHECK: error: instruction requires: vector-enhancements-2
#CHECK: vllebrze %v0, 0
#CHECK: error: instruction requires: vector-enhancements-2
#CHECK: vllebrzf %v0, 0
#CHECK: error: instruction requires: vector-enhancements-2
#CHECK: vllebrzg %v0, 0
#CHECK: error: instruction requires: vector-enhancements-2
#CHECK: vllebrzh %v0, 0
vllebrz %v0, 0, 0
vllebrze %v0, 0
vllebrzf %v0, 0
vllebrzg %v0, 0
vllebrzh %v0, 0
#CHECK: error: invalid operand
#CHECK: vlrl %v0, 0, -1
#CHECK: error: invalid operand
@ -551,6 +769,11 @@
vsdp %v0, %v0, %v0, -1, 0
vsdp %v0, %v0, %v0, 256, 0
#CHECK: error: instruction requires: vector-enhancements-2
#CHECK: vsld %v0, %v0, %v0, 0
vsld %v0, %v0, %v0, 0
#CHECK: error: invalid operand
#CHECK: vsp %v0, %v0, %v0, 0, -1
#CHECK: error: invalid operand
@ -565,6 +788,11 @@
vsp %v0, %v0, %v0, -1, 0
vsp %v0, %v0, %v0, 256, 0
#CHECK: error: instruction requires: vector-enhancements-2
#CHECK: vsrd %v0, %v0, %v0, 0
vsrd %v0, %v0, %v0, 0
#CHECK: error: invalid operand
#CHECK: vsrp %v0, %v0, 0, 0, -1
#CHECK: error: invalid operand
@ -585,6 +813,48 @@
vsrp %v0, %v0, -1, 0, 0
vsrp %v0, %v0, 256, 0, 0
#CHECK: error: instruction requires: vector-enhancements-2
#CHECK: vstbr %v0, 0, 0
#CHECK: error: instruction requires: vector-enhancements-2
#CHECK: vstbrf %v0, 0
#CHECK: error: instruction requires: vector-enhancements-2
#CHECK: vstbrg %v0, 0
#CHECK: error: instruction requires: vector-enhancements-2
#CHECK: vstbrh %v0, 0
#CHECK: error: instruction requires: vector-enhancements-2
#CHECK: vstbrq %v0, 0
vstbr %v0, 0, 0
vstbrf %v0, 0
vstbrg %v0, 0
vstbrh %v0, 0
vstbrq %v0, 0
#CHECK: error: instruction requires: vector-enhancements-2
#CHECK: vstebrf %v0, 0, 0
#CHECK: error: instruction requires: vector-enhancements-2
#CHECK: vstebrg %v0, 0, 0
#CHECK: error: instruction requires: vector-enhancements-2
#CHECK: vstebrh %v0, 0, 0
vstebrf %v0, 0, 0
vstebrg %v0, 0, 0
vstebrh %v0, 0, 0
#CHECK: error: instruction requires: vector-enhancements-2
#CHECK: vster %v0, 0, 0
#CHECK: error: instruction requires: vector-enhancements-2
#CHECK: vsterf %v0, 0
#CHECK: error: instruction requires: vector-enhancements-2
#CHECK: vsterg %v0, 0
#CHECK: error: instruction requires: vector-enhancements-2
#CHECK: vsterh %v0, 0
vster %v0, 0, 0
vsterf %v0, 0
vsterg %v0, 0
vsterh %v0, 0
#CHECK: error: invalid operand
#CHECK: vstrl %v0, 0, -1
#CHECK: error: invalid operand
@ -613,6 +883,29 @@
vstrlr %v0, %r0, 4096
vstrlr %v0, %r0, 0(%r0)
#CHECK: error: instruction requires: vector-enhancements-2
#CHECK: vstrs %v0, %v0, %v0, %v0, 0
#CHECK: error: instruction requires: vector-enhancements-2
#CHECK: vstrsb %v0, %v0, %v0, %v0
#CHECK: error: instruction requires: vector-enhancements-2
#CHECK: vstrsf %v0, %v0, %v0, %v0
#CHECK: error: instruction requires: vector-enhancements-2
#CHECK: vstrsh %v0, %v0, %v0, %v0
#CHECK: error: instruction requires: vector-enhancements-2
#CHECK: vstrszb %v0, %v0, %v0, %v0
#CHECK: error: instruction requires: vector-enhancements-2
#CHECK: vstrszf %v0, %v0, %v0, %v0
#CHECK: error: instruction requires: vector-enhancements-2
#CHECK: vstrszh %v0, %v0, %v0, %v0
vstrs %v0, %v0, %v0, %v0, 0
vstrsb %v0, %v0, %v0, %v0
vstrsf %v0, %v0, %v0, %v0
vstrsh %v0, %v0, %v0, %v0
vstrszb %v0, %v0, %v0, %v0
vstrszf %v0, %v0, %v0, %v0
vstrszh %v0, %v0, %v0, %v0
#CHECK: error: invalid operand
#CHECK: vupkz %v0, 0, -1
#CHECK: error: invalid operand
@ -630,6 +923,26 @@
vupkz %v0, 4096, 0
vupkz %v0, 0(%r0), 0
#CHECK: error: instruction requires: vector-enhancements-2
#CHECK: wcefb %v0, %v0, 0, 0
wcefb %v0, %v0, 0, 0
#CHECK: error: instruction requires: vector-enhancements-2
#CHECK: wcelfb %v0, %v0, 0, 0
wcelfb %v0, %v0, 0, 0
#CHECK: error: instruction requires: vector-enhancements-2
#CHECK: wcfeb %v0, %v0, 0, 0
wcfeb %v0, %v0, 0, 0
#CHECK: error: instruction requires: vector-enhancements-2
#CHECK: wclfeb %v0, %v0, 0, 0
wclfeb %v0, %v0, 0, 0
#CHECK: error: invalid operand
#CHECK: wfisb %v0, %v0, 0, -1
#CHECK: error: invalid operand

File diff suppressed because it is too large Load Diff

View File

@ -381,3 +381,21 @@ define <16 x i8> @test_vsldb(<16 x i8> %a, <16 x i8> %b, i32 %c) {
ret <16 x i8> %res
}
declare <16 x i8> @llvm.s390.vsld(<16 x i8>, <16 x i8>, i32)
define <16 x i8> @test_vsld(<16 x i8> %a, <16 x i8> %b, i32 %c) {
; CHECK: immarg operand has non-immediate parameter
; CHECK-NEXT: i32 %c
; CHECK-NEXT: %res = call <16 x i8> @llvm.s390.vsld(<16 x i8> %a, <16 x i8> %b, i32 %c)
%res = call <16 x i8> @llvm.s390.vsld(<16 x i8> %a, <16 x i8> %b, i32 %c)
ret <16 x i8> %res
}
declare <16 x i8> @llvm.s390.vsrd(<16 x i8>, <16 x i8>, i32)
define <16 x i8> @test_vsrd(<16 x i8> %a, <16 x i8> %b, i32 %c) {
; CHECK: immarg operand has non-immediate parameter
; CHECK-NEXT: i32 %c
; CHECK-NEXT: %res = call <16 x i8> @llvm.s390.vsrd(<16 x i8> %a, <16 x i8> %b, i32 %c)
%res = call <16 x i8> @llvm.s390.vsrd(<16 x i8> %a, <16 x i8> %b, i32 %c)
ret <16 x i8> %res
}