forked from OSchip/llvm-project
[SystemZ] Add CodeGen support for v2f64
This adds ABI and CodeGen support for the v2f64 type, which is natively supported by z13 instructions. Based on a patch by Richard Sandiford. llvm-svn: 236522
This commit is contained in:
parent
ce4c109585
commit
cd808237b2
|
@ -44,7 +44,7 @@ def RetCC_SystemZ : CallingConv<[
|
|||
|
||||
// Similarly for vectors, with V24 being the ABI-compliant choice.
|
||||
CCIfSubtarget<"hasVector()",
|
||||
CCIfType<[v16i8, v8i16, v4i32, v2i64],
|
||||
CCIfType<[v16i8, v8i16, v4i32, v2i64, v2f64],
|
||||
CCAssignToReg<[V24, V26, V28, V30, V25, V27, V29, V31]>>>
|
||||
|
||||
// ABI-compliant code returns long double by reference, but that conversion
|
||||
|
@ -76,13 +76,13 @@ def CC_SystemZ : CallingConv<[
|
|||
|
||||
// The first 8 named vector arguments are passed in V24-V31.
|
||||
CCIfSubtarget<"hasVector()",
|
||||
CCIfType<[v16i8, v8i16, v4i32, v2i64],
|
||||
CCIfType<[v16i8, v8i16, v4i32, v2i64, v2f64],
|
||||
CCIfFixed<CCAssignToReg<[V24, V26, V28, V30,
|
||||
V25, V27, V29, V31]>>>>,
|
||||
|
||||
// Other vector arguments are passed in 8-byte-aligned 16-byte stack slots.
|
||||
CCIfSubtarget<"hasVector()",
|
||||
CCIfType<[v16i8, v8i16, v4i32, v2i64],
|
||||
CCIfType<[v16i8, v8i16, v4i32, v2i64, v2f64],
|
||||
CCAssignToStack<16, 8>>>,
|
||||
|
||||
// Other arguments are passed in 8-byte-aligned 8-byte stack slots.
|
||||
|
|
|
@ -101,6 +101,7 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &tm,
|
|||
addRegisterClass(MVT::v8i16, &SystemZ::VR128BitRegClass);
|
||||
addRegisterClass(MVT::v4i32, &SystemZ::VR128BitRegClass);
|
||||
addRegisterClass(MVT::v2i64, &SystemZ::VR128BitRegClass);
|
||||
addRegisterClass(MVT::v2f64, &SystemZ::VR128BitRegClass);
|
||||
}
|
||||
|
||||
// Compute derived properties from the register classes
|
||||
|
@ -327,6 +328,15 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &tm,
|
|||
}
|
||||
}
|
||||
|
||||
if (Subtarget.hasVector()) {
|
||||
// There should be no need to check for float types other than v2f64
|
||||
// since <2 x f32> isn't a legal type.
|
||||
setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal);
|
||||
setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Legal);
|
||||
setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Legal);
|
||||
setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Legal);
|
||||
}
|
||||
|
||||
// Handle floating-point types.
|
||||
for (unsigned I = MVT::FIRST_FP_VALUETYPE;
|
||||
I <= MVT::LAST_FP_VALUETYPE;
|
||||
|
@ -352,6 +362,33 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &tm,
|
|||
}
|
||||
}
|
||||
|
||||
// Handle floating-point vector types.
|
||||
if (Subtarget.hasVector()) {
|
||||
// Scalar-to-vector conversion is just a subreg.
|
||||
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal);
|
||||
|
||||
// Some insertions and extractions can be done directly but others
|
||||
// need to go via integers.
|
||||
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f64, Custom);
|
||||
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Custom);
|
||||
|
||||
// These operations have direct equivalents.
|
||||
setOperationAction(ISD::FADD, MVT::v2f64, Legal);
|
||||
setOperationAction(ISD::FNEG, MVT::v2f64, Legal);
|
||||
setOperationAction(ISD::FSUB, MVT::v2f64, Legal);
|
||||
setOperationAction(ISD::FMUL, MVT::v2f64, Legal);
|
||||
setOperationAction(ISD::FMA, MVT::v2f64, Legal);
|
||||
setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
|
||||
setOperationAction(ISD::FABS, MVT::v2f64, Legal);
|
||||
setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
|
||||
setOperationAction(ISD::FRINT, MVT::v2f64, Legal);
|
||||
setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal);
|
||||
setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);
|
||||
setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
|
||||
setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal);
|
||||
setOperationAction(ISD::FROUND, MVT::v2f64, Legal);
|
||||
}
|
||||
|
||||
// We have fused multiply-addition for f32 and f64 but not f128.
|
||||
setOperationAction(ISD::FMA, MVT::f32, Legal);
|
||||
setOperationAction(ISD::FMA, MVT::f64, Legal);
|
||||
|
@ -818,6 +855,7 @@ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
|
|||
case MVT::v8i16:
|
||||
case MVT::v4i32:
|
||||
case MVT::v2i64:
|
||||
case MVT::v2f64:
|
||||
RC = &SystemZ::VR128BitRegClass;
|
||||
break;
|
||||
}
|
||||
|
@ -1894,18 +1932,25 @@ static SDValue emitSETCC(SelectionDAG &DAG, SDLoc DL, SDValue Glue,
|
|||
return Result;
|
||||
}
|
||||
|
||||
// Return the SystemZISD vector comparison operation for CC, or 0 if it cannot
|
||||
// be done directly.
|
||||
static unsigned getVectorComparison(ISD::CondCode CC) {
|
||||
// Return the SystemISD vector comparison operation for CC, or 0 if it cannot
|
||||
// be done directly. IsFP is true if CC is for a floating-point rather than
|
||||
// integer comparison.
|
||||
static unsigned getVectorComparison(ISD::CondCode CC, bool IsFP) {
|
||||
switch (CC) {
|
||||
case ISD::SETOEQ:
|
||||
case ISD::SETEQ:
|
||||
return SystemZISD::VICMPE;
|
||||
return IsFP ? SystemZISD::VFCMPE : SystemZISD::VICMPE;
|
||||
|
||||
case ISD::SETOGE:
|
||||
case ISD::SETGE:
|
||||
return IsFP ? SystemZISD::VFCMPHE : 0;
|
||||
|
||||
case ISD::SETOGT:
|
||||
case ISD::SETGT:
|
||||
return SystemZISD::VICMPH;
|
||||
return IsFP ? SystemZISD::VFCMPH : SystemZISD::VICMPH;
|
||||
|
||||
case ISD::SETUGT:
|
||||
return SystemZISD::VICMPHL;
|
||||
return IsFP ? 0 : SystemZISD::VICMPHL;
|
||||
|
||||
default:
|
||||
return 0;
|
||||
|
@ -1914,15 +1959,17 @@ static unsigned getVectorComparison(ISD::CondCode CC) {
|
|||
|
||||
// Return the SystemZISD vector comparison operation for CC or its inverse,
|
||||
// or 0 if neither can be done directly. Indicate in Invert whether the
|
||||
// result is for the inverse of CC.
|
||||
static unsigned getVectorComparisonOrInvert(ISD::CondCode CC, bool &Invert) {
|
||||
if (unsigned Opcode = getVectorComparison(CC)) {
|
||||
// result is for the inverse of CC. IsFP is true if CC is for a
|
||||
// floating-point rather than integer comparison.
|
||||
static unsigned getVectorComparisonOrInvert(ISD::CondCode CC, bool IsFP,
|
||||
bool &Invert) {
|
||||
if (unsigned Opcode = getVectorComparison(CC, IsFP)) {
|
||||
Invert = false;
|
||||
return Opcode;
|
||||
}
|
||||
|
||||
CC = ISD::getSetCCInverse(CC, true);
|
||||
if (unsigned Opcode = getVectorComparison(CC)) {
|
||||
CC = ISD::getSetCCInverse(CC, !IsFP);
|
||||
if (unsigned Opcode = getVectorComparison(CC, IsFP)) {
|
||||
Invert = true;
|
||||
return Opcode;
|
||||
}
|
||||
|
@ -1935,18 +1982,46 @@ static unsigned getVectorComparisonOrInvert(ISD::CondCode CC, bool &Invert) {
|
|||
static SDValue lowerVectorSETCC(SelectionDAG &DAG, SDLoc DL, EVT VT,
|
||||
ISD::CondCode CC, SDValue CmpOp0,
|
||||
SDValue CmpOp1) {
|
||||
bool IsFP = CmpOp0.getValueType().isFloatingPoint();
|
||||
bool Invert = false;
|
||||
SDValue Cmp;
|
||||
// It doesn't really matter whether we try the inversion or the swap first,
|
||||
// since there are no cases where both work.
|
||||
if (unsigned Opcode = getVectorComparisonOrInvert(CC, Invert))
|
||||
Cmp = DAG.getNode(Opcode, DL, VT, CmpOp0, CmpOp1);
|
||||
else {
|
||||
CC = ISD::getSetCCSwappedOperands(CC);
|
||||
if (unsigned Opcode = getVectorComparisonOrInvert(CC, Invert))
|
||||
Cmp = DAG.getNode(Opcode, DL, VT, CmpOp1, CmpOp0);
|
||||
else
|
||||
llvm_unreachable("Unhandled comparison");
|
||||
switch (CC) {
|
||||
// Handle tests for order using (or (ogt y x) (oge x y)).
|
||||
case ISD::SETUO:
|
||||
Invert = true;
|
||||
case ISD::SETO: {
|
||||
assert(IsFP && "Unexpected integer comparison");
|
||||
SDValue LT = DAG.getNode(SystemZISD::VFCMPH, DL, VT, CmpOp1, CmpOp0);
|
||||
SDValue GE = DAG.getNode(SystemZISD::VFCMPHE, DL, VT, CmpOp0, CmpOp1);
|
||||
Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GE);
|
||||
break;
|
||||
}
|
||||
|
||||
// Handle <> tests using (or (ogt y x) (ogt x y)).
|
||||
case ISD::SETUEQ:
|
||||
Invert = true;
|
||||
case ISD::SETONE: {
|
||||
assert(IsFP && "Unexpected integer comparison");
|
||||
SDValue LT = DAG.getNode(SystemZISD::VFCMPH, DL, VT, CmpOp1, CmpOp0);
|
||||
SDValue GT = DAG.getNode(SystemZISD::VFCMPH, DL, VT, CmpOp0, CmpOp1);
|
||||
Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GT);
|
||||
break;
|
||||
}
|
||||
|
||||
// Otherwise a single comparison is enough. It doesn't really
|
||||
// matter whether we try the inversion or the swap first, since
|
||||
// there are no cases where both work.
|
||||
default:
|
||||
if (unsigned Opcode = getVectorComparisonOrInvert(CC, IsFP, Invert))
|
||||
Cmp = DAG.getNode(Opcode, DL, VT, CmpOp0, CmpOp1);
|
||||
else {
|
||||
CC = ISD::getSetCCSwappedOperands(CC);
|
||||
if (unsigned Opcode = getVectorComparisonOrInvert(CC, IsFP, Invert))
|
||||
Cmp = DAG.getNode(Opcode, DL, VT, CmpOp1, CmpOp0);
|
||||
else
|
||||
llvm_unreachable("Unhandled comparison");
|
||||
}
|
||||
break;
|
||||
}
|
||||
if (Invert) {
|
||||
SDValue Mask = DAG.getNode(SystemZISD::BYTE_MASK, DL, MVT::v16i8,
|
||||
|
@ -3326,6 +3401,46 @@ SDValue GeneralShuffle::getNode(SelectionDAG &DAG, SDLoc DL) {
|
|||
return DAG.getNode(ISD::BITCAST, DL, VT, Op);
|
||||
}
|
||||
|
||||
// Return true if the given BUILD_VECTOR is a scalar-to-vector conversion.
|
||||
static bool isScalarToVector(SDValue Op) {
|
||||
for (unsigned I = 1, E = Op.getNumOperands(); I != E; ++I)
|
||||
if (Op.getOperand(I).getOpcode() != ISD::UNDEF)
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
// Return a vector of type VT that contains Value in the first element.
|
||||
// The other elements don't matter.
|
||||
static SDValue buildScalarToVector(SelectionDAG &DAG, SDLoc DL, EVT VT,
|
||||
SDValue Value) {
|
||||
// If we have a constant, replicate it to all elements and let the
|
||||
// BUILD_VECTOR lowering take care of it.
|
||||
if (Value.getOpcode() == ISD::Constant ||
|
||||
Value.getOpcode() == ISD::ConstantFP) {
|
||||
SmallVector<SDValue, 16> Ops(VT.getVectorNumElements(), Value);
|
||||
return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Ops);
|
||||
}
|
||||
if (Value.getOpcode() == ISD::UNDEF)
|
||||
return DAG.getUNDEF(VT);
|
||||
return DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, Value);
|
||||
}
|
||||
|
||||
// Return a vector of type VT in which Op0 is in element 0 and Op1 is in
|
||||
// element 1. Used for cases in which replication is cheap.
|
||||
static SDValue buildMergeScalars(SelectionDAG &DAG, SDLoc DL, EVT VT,
|
||||
SDValue Op0, SDValue Op1) {
|
||||
if (Op0.getOpcode() == ISD::UNDEF) {
|
||||
if (Op1.getOpcode() == ISD::UNDEF)
|
||||
return DAG.getUNDEF(VT);
|
||||
return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op1);
|
||||
}
|
||||
if (Op1.getOpcode() == ISD::UNDEF)
|
||||
return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op0);
|
||||
return DAG.getNode(SystemZISD::MERGE_HIGH, DL, VT,
|
||||
buildScalarToVector(DAG, DL, VT, Op0),
|
||||
buildScalarToVector(DAG, DL, VT, Op1));
|
||||
}
|
||||
|
||||
// Extend GPR scalars Op0 and Op1 to doublewords and return a v2i64
|
||||
// vector for them.
|
||||
static SDValue joinDwords(SelectionDAG &DAG, SDLoc DL, SDValue Op0,
|
||||
|
@ -3502,6 +3617,10 @@ static SDValue buildVector(SelectionDAG &DAG, SDLoc DL, EVT VT,
|
|||
if (VT == MVT::v2i64)
|
||||
return joinDwords(DAG, DL, Elems[0], Elems[1]);
|
||||
|
||||
// Use a 64-bit merge high to combine two doubles.
|
||||
if (VT == MVT::v2f64)
|
||||
return buildMergeScalars(DAG, DL, VT, Elems[0], Elems[1]);
|
||||
|
||||
// Collect the constant terms.
|
||||
SmallVector<SDValue, SystemZ::VectorBytes> Constants(NumElements, SDValue());
|
||||
SmallVector<bool, SystemZ::VectorBytes> Done(NumElements, false);
|
||||
|
@ -3614,6 +3733,10 @@ SDValue SystemZTargetLowering::lowerBUILD_VECTOR(SDValue Op,
|
|||
if (Res.getNode())
|
||||
return Res;
|
||||
|
||||
// Detect SCALAR_TO_VECTOR conversions.
|
||||
if (isOperationLegal(ISD::SCALAR_TO_VECTOR, VT) && isScalarToVector(Op))
|
||||
return buildScalarToVector(DAG, DL, VT, Op.getOperand(0));
|
||||
|
||||
// Otherwise use buildVector to build the vector up from GPRs.
|
||||
unsigned NumElements = Op.getNumOperands();
|
||||
SmallVector<SDValue, SystemZ::VectorBytes> Ops(NumElements);
|
||||
|
@ -3664,6 +3787,62 @@ SDValue SystemZTargetLowering::lowerSCALAR_TO_VECTOR(SDValue Op,
|
|||
Op.getOperand(0), DAG.getConstant(0, DL, MVT::i32));
|
||||
}
|
||||
|
||||
SDValue SystemZTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
|
||||
SelectionDAG &DAG) const {
|
||||
// Handle insertions of floating-point values.
|
||||
SDLoc DL(Op);
|
||||
SDValue Op0 = Op.getOperand(0);
|
||||
SDValue Op1 = Op.getOperand(1);
|
||||
SDValue Op2 = Op.getOperand(2);
|
||||
EVT VT = Op.getValueType();
|
||||
|
||||
// Insertions into constant indices can be done using VPDI. However,
|
||||
// if the inserted value is a bitcast or a constant then it's better
|
||||
// to use GPRs, as below.
|
||||
if (Op1.getOpcode() != ISD::BITCAST &&
|
||||
Op1.getOpcode() != ISD::ConstantFP &&
|
||||
Op2.getOpcode() == ISD::Constant) {
|
||||
uint64_t Index = dyn_cast<ConstantSDNode>(Op2)->getZExtValue();
|
||||
unsigned Mask = VT.getVectorNumElements() - 1;
|
||||
if (Index <= Mask)
|
||||
return Op;
|
||||
}
|
||||
|
||||
// Otherwise bitcast to the equivalent integer form and insert via a GPR.
|
||||
MVT IntVT = MVT::getIntegerVT(VT.getVectorElementType().getSizeInBits());
|
||||
MVT IntVecVT = MVT::getVectorVT(IntVT, VT.getVectorNumElements());
|
||||
SDValue Res = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntVecVT,
|
||||
DAG.getNode(ISD::BITCAST, DL, IntVecVT, Op0),
|
||||
DAG.getNode(ISD::BITCAST, DL, IntVT, Op1), Op2);
|
||||
return DAG.getNode(ISD::BITCAST, DL, VT, Res);
|
||||
}
|
||||
|
||||
SDValue
|
||||
SystemZTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
|
||||
SelectionDAG &DAG) const {
|
||||
// Handle extractions of floating-point values.
|
||||
SDLoc DL(Op);
|
||||
SDValue Op0 = Op.getOperand(0);
|
||||
SDValue Op1 = Op.getOperand(1);
|
||||
EVT VT = Op.getValueType();
|
||||
EVT VecVT = Op0.getValueType();
|
||||
|
||||
// Extractions of constant indices can be done directly.
|
||||
if (auto *CIndexN = dyn_cast<ConstantSDNode>(Op1)) {
|
||||
uint64_t Index = CIndexN->getZExtValue();
|
||||
unsigned Mask = VecVT.getVectorNumElements() - 1;
|
||||
if (Index <= Mask)
|
||||
return Op;
|
||||
}
|
||||
|
||||
// Otherwise bitcast to the equivalent integer form and extract via a GPR.
|
||||
MVT IntVT = MVT::getIntegerVT(VT.getSizeInBits());
|
||||
MVT IntVecVT = MVT::getVectorVT(IntVT, VecVT.getVectorNumElements());
|
||||
SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, IntVT,
|
||||
DAG.getNode(ISD::BITCAST, DL, IntVecVT, Op0), Op1);
|
||||
return DAG.getNode(ISD::BITCAST, DL, VT, Res);
|
||||
}
|
||||
|
||||
SDValue SystemZTargetLowering::lowerShift(SDValue Op, SelectionDAG &DAG,
|
||||
unsigned ByScalar) const {
|
||||
// Look for cases where a vector shift can use the *_BY_SCALAR form.
|
||||
|
@ -3808,6 +3987,10 @@ SDValue SystemZTargetLowering::LowerOperation(SDValue Op,
|
|||
return lowerVECTOR_SHUFFLE(Op, DAG);
|
||||
case ISD::SCALAR_TO_VECTOR:
|
||||
return lowerSCALAR_TO_VECTOR(Op, DAG);
|
||||
case ISD::INSERT_VECTOR_ELT:
|
||||
return lowerINSERT_VECTOR_ELT(Op, DAG);
|
||||
case ISD::EXTRACT_VECTOR_ELT:
|
||||
return lowerEXTRACT_VECTOR_ELT(Op, DAG);
|
||||
case ISD::SHL:
|
||||
return lowerShift(Op, DAG, SystemZISD::VSHL_BY_SCALAR);
|
||||
case ISD::SRL:
|
||||
|
@ -3879,6 +4062,9 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const {
|
|||
OPCODE(VICMPE);
|
||||
OPCODE(VICMPH);
|
||||
OPCODE(VICMPHL);
|
||||
OPCODE(VFCMPE);
|
||||
OPCODE(VFCMPH);
|
||||
OPCODE(VFCMPHE);
|
||||
OPCODE(ATOMIC_SWAPW);
|
||||
OPCODE(ATOMIC_LOADW_ADD);
|
||||
OPCODE(ATOMIC_LOADW_SUB);
|
||||
|
|
|
@ -219,6 +219,13 @@ enum {
|
|||
VICMPH,
|
||||
VICMPHL,
|
||||
|
||||
// Compare floating-point vector operands 0 and 1 to preoduce the usual 0/-1
|
||||
// vector result. VFCMPE is for "ordered and equal", VFCMPH for "ordered and
|
||||
// greater than" and VFCMPHE for "ordered and greater than or equal to".
|
||||
VFCMPE,
|
||||
VFCMPH,
|
||||
VFCMPHE,
|
||||
|
||||
// Wrappers around the inner loop of an 8- or 16-bit ATOMIC_SWAP or
|
||||
// ATOMIC_LOAD_<op>.
|
||||
//
|
||||
|
@ -400,6 +407,8 @@ private:
|
|||
SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue lowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue lowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue lowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue lowerShift(SDValue Op, SelectionDAG &DAG, unsigned ByScalar) const;
|
||||
|
||||
SDValue combineExtract(SDLoc DL, EVT ElemVT, EVT VecVT, SDValue OrigOp,
|
||||
|
|
|
@ -118,18 +118,24 @@ let Predicates = [FeatureVector] in {
|
|||
def VLREPH : UnaryVRX<"vlreph", 0xE705, z_replicate_loadi16, v128h, 2, 1>;
|
||||
def VLREPF : UnaryVRX<"vlrepf", 0xE705, z_replicate_loadi32, v128f, 4, 2>;
|
||||
def VLREPG : UnaryVRX<"vlrepg", 0xE705, z_replicate_loadi64, v128g, 8, 3>;
|
||||
def : Pat<(v2f64 (z_replicate_loadf64 bdxaddr12only:$addr)),
|
||||
(VLREPG bdxaddr12only:$addr)>;
|
||||
|
||||
// Load logical element and zero.
|
||||
def VLLEZB : UnaryVRX<"vllezb", 0xE704, z_vllezi8, v128b, 1, 0>;
|
||||
def VLLEZH : UnaryVRX<"vllezh", 0xE704, z_vllezi16, v128h, 2, 1>;
|
||||
def VLLEZF : UnaryVRX<"vllezf", 0xE704, z_vllezi32, v128f, 4, 2>;
|
||||
def VLLEZG : UnaryVRX<"vllezg", 0xE704, z_vllezi64, v128g, 8, 3>;
|
||||
def : Pat<(v2f64 (z_vllezf64 bdxaddr12only:$addr)),
|
||||
(VLLEZG bdxaddr12only:$addr)>;
|
||||
|
||||
// Load element.
|
||||
def VLEB : TernaryVRX<"vleb", 0xE700, z_vlei8, v128b, v128b, 1, imm32zx4>;
|
||||
def VLEH : TernaryVRX<"vleh", 0xE701, z_vlei16, v128h, v128h, 2, imm32zx3>;
|
||||
def VLEF : TernaryVRX<"vlef", 0xE703, z_vlei32, v128f, v128f, 4, imm32zx2>;
|
||||
def VLEG : TernaryVRX<"vleg", 0xE702, z_vlei64, v128g, v128g, 8, imm32zx1>;
|
||||
def : Pat<(z_vlef64 (v2f64 VR128:$val), bdxaddr12only:$addr, imm32zx1:$index),
|
||||
(VLEG VR128:$val, bdxaddr12only:$addr, imm32zx1:$index)>;
|
||||
|
||||
// Gather element.
|
||||
def VGEF : TernaryVRV<"vgef", 0xE713, 4, imm32zx2>;
|
||||
|
@ -152,6 +158,7 @@ defm : ReplicatePeephole<VLREPB, v16i8, anyextloadi8, i32>;
|
|||
defm : ReplicatePeephole<VLREPH, v8i16, anyextloadi16, i32>;
|
||||
defm : ReplicatePeephole<VLREPF, v4i32, load, i32>;
|
||||
defm : ReplicatePeephole<VLREPG, v2i64, load, i64>;
|
||||
defm : ReplicatePeephole<VLREPG, v2f64, load, f64>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Stores
|
||||
|
@ -172,6 +179,9 @@ let Predicates = [FeatureVector] in {
|
|||
def VSTEH : StoreBinaryVRX<"vsteh", 0xE709, z_vstei16, v128h, 2, imm32zx3>;
|
||||
def VSTEF : StoreBinaryVRX<"vstef", 0xE70B, z_vstei32, v128f, 4, imm32zx2>;
|
||||
def VSTEG : StoreBinaryVRX<"vsteg", 0xE70A, z_vstei64, v128g, 8, imm32zx1>;
|
||||
def : Pat<(z_vstef64 (v2f64 VR128:$val), bdxaddr12only:$addr,
|
||||
imm32zx1:$index),
|
||||
(VSTEG VR128:$val, bdxaddr12only:$addr, imm32zx1:$index)>;
|
||||
|
||||
// Scatter element.
|
||||
def VSCEF : StoreBinaryVRV<"vscef", 0xE71B, 4, imm32zx2>;
|
||||
|
@ -188,12 +198,14 @@ let Predicates = [FeatureVector] in {
|
|||
def VMRHH : BinaryVRRc<"vmrhh", 0xE761, z_merge_high, v128h, v128h, 1>;
|
||||
def VMRHF : BinaryVRRc<"vmrhf", 0xE761, z_merge_high, v128f, v128f, 2>;
|
||||
def VMRHG : BinaryVRRc<"vmrhg", 0xE761, z_merge_high, v128g, v128g, 3>;
|
||||
def : BinaryRRWithType<VMRHG, VR128, z_merge_high, v2f64>;
|
||||
|
||||
// Merge low.
|
||||
def VMRLB : BinaryVRRc<"vmrlb", 0xE760, z_merge_low, v128b, v128b, 0>;
|
||||
def VMRLH : BinaryVRRc<"vmrlh", 0xE760, z_merge_low, v128h, v128h, 1>;
|
||||
def VMRLF : BinaryVRRc<"vmrlf", 0xE760, z_merge_low, v128f, v128f, 2>;
|
||||
def VMRLG : BinaryVRRc<"vmrlg", 0xE760, z_merge_low, v128g, v128g, 3>;
|
||||
def : BinaryRRWithType<VMRLG, VR128, z_merge_low, v2f64>;
|
||||
|
||||
// Permute.
|
||||
def VPERM : TernaryVRRe<"vperm", 0xE78C, z_permute, v128b, v128b>;
|
||||
|
@ -206,6 +218,8 @@ let Predicates = [FeatureVector] in {
|
|||
def VREPH : BinaryVRIc<"vreph", 0xE74D, z_splat, v128h, v128h, 1>;
|
||||
def VREPF : BinaryVRIc<"vrepf", 0xE74D, z_splat, v128f, v128f, 2>;
|
||||
def VREPG : BinaryVRIc<"vrepg", 0xE74D, z_splat, v128g, v128g, 3>;
|
||||
def : Pat<(v2f64 (z_splat VR128:$vec, imm32zx16:$index)),
|
||||
(VREPG VR128:$vec, imm32zx16:$index)>;
|
||||
|
||||
// Select.
|
||||
def VSEL : TernaryVRRe<"vsel", 0xE78D, null_frag, v128any, v128any>;
|
||||
|
@ -287,6 +301,7 @@ defm : GenericVectorOps<v16i8, v16i8>;
|
|||
defm : GenericVectorOps<v8i16, v8i16>;
|
||||
defm : GenericVectorOps<v4i32, v4i32>;
|
||||
defm : GenericVectorOps<v2i64, v2i64>;
|
||||
defm : GenericVectorOps<v2f64, v2i64>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Integer arithmetic
|
||||
|
@ -734,34 +749,52 @@ let Predicates = [FeatureVector] in {
|
|||
// Floating-point arithmetic
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
// See comments in SystemZInstrFP.td for the suppression flags and
|
||||
// rounding modes.
|
||||
multiclass VectorRounding<Instruction insn, TypedReg tr> {
|
||||
def : FPConversion<insn, frint, tr, tr, 0, 0>;
|
||||
def : FPConversion<insn, fnearbyint, tr, tr, 4, 0>;
|
||||
def : FPConversion<insn, ffloor, tr, tr, 4, 7>;
|
||||
def : FPConversion<insn, fceil, tr, tr, 4, 6>;
|
||||
def : FPConversion<insn, ftrunc, tr, tr, 4, 5>;
|
||||
def : FPConversion<insn, frnd, tr, tr, 4, 1>;
|
||||
}
|
||||
|
||||
let Predicates = [FeatureVector] in {
|
||||
// Add.
|
||||
def VFADB : BinaryVRRc<"vfadb", 0xE7E3, null_frag, v128db, v128db, 3, 0>;
|
||||
def VFADB : BinaryVRRc<"vfadb", 0xE7E3, fadd, v128db, v128db, 3, 0>;
|
||||
def WFADB : BinaryVRRc<"wfadb", 0xE7E3, null_frag, v64db, v64db, 3, 8>;
|
||||
|
||||
// Convert from fixed 64-bit.
|
||||
def VCDGB : TernaryVRRa<"vcdgb", 0xE7C3, null_frag, v128db, v128g, 3, 0>;
|
||||
def WCDGB : TernaryVRRa<"wcdgb", 0xE7C3, null_frag, v64db, v64g, 3, 8>;
|
||||
def : FPConversion<VCDGB, sint_to_fp, v128db, v128g, 0, 0>;
|
||||
|
||||
// Convert from logical 64-bit.
|
||||
def VCDLGB : TernaryVRRa<"vcdlgb", 0xE7C1, null_frag, v128db, v128g, 3, 0>;
|
||||
def WCDLGB : TernaryVRRa<"wcdlgb", 0xE7C1, null_frag, v64db, v64g, 3, 8>;
|
||||
def : FPConversion<VCDLGB, uint_to_fp, v128db, v128g, 0, 0>;
|
||||
|
||||
// Convert to fixed 64-bit.
|
||||
def VCGDB : TernaryVRRa<"vcgdb", 0xE7C2, null_frag, v128g, v128db, 3, 0>;
|
||||
def WCGDB : TernaryVRRa<"wcgdb", 0xE7C2, null_frag, v64g, v64db, 3, 8>;
|
||||
// Rounding mode should agree with SystemZInstrFP.td.
|
||||
def : FPConversion<VCGDB, fp_to_sint, v128g, v128db, 0, 5>;
|
||||
|
||||
// Convert to logical 64-bit.
|
||||
def VCLGDB : TernaryVRRa<"vclgdb", 0xE7C0, null_frag, v128g, v128db, 3, 0>;
|
||||
def WCLGDB : TernaryVRRa<"wclgdb", 0xE7C0, null_frag, v64g, v64db, 3, 8>;
|
||||
// Rounding mode should agree with SystemZInstrFP.td.
|
||||
def : FPConversion<VCLGDB, fp_to_uint, v128g, v128db, 0, 5>;
|
||||
|
||||
// Divide.
|
||||
def VFDDB : BinaryVRRc<"vfddb", 0xE7E5, null_frag, v128db, v128db, 3, 0>;
|
||||
def VFDDB : BinaryVRRc<"vfddb", 0xE7E5, fdiv, v128db, v128db, 3, 0>;
|
||||
def WFDDB : BinaryVRRc<"wfddb", 0xE7E5, null_frag, v64db, v64db, 3, 8>;
|
||||
|
||||
// Load FP integer.
|
||||
def VFIDB : TernaryVRRa<"vfidb", 0xE7C7, null_frag, v128db, v128db, 3, 0>;
|
||||
def WFIDB : TernaryVRRa<"wfidb", 0xE7C7, null_frag, v64db, v64db, 3, 8>;
|
||||
defm : VectorRounding<VFIDB, v128db>;
|
||||
|
||||
// Load lengthened.
|
||||
def VLDEB : UnaryVRRa<"vldeb", 0xE7C4, null_frag, v128db, v128eb, 2, 0>;
|
||||
|
@ -772,35 +805,35 @@ let Predicates = [FeatureVector] in {
|
|||
def WLEDB : TernaryVRRa<"wledb", 0xE7C5, null_frag, v32eb, v64db, 3, 8>;
|
||||
|
||||
// Multiply.
|
||||
def VFMDB : BinaryVRRc<"vfmdb", 0xE7E7, null_frag, v128db, v128db, 3, 0>;
|
||||
def VFMDB : BinaryVRRc<"vfmdb", 0xE7E7, fmul, v128db, v128db, 3, 0>;
|
||||
def WFMDB : BinaryVRRc<"wfmdb", 0xE7E7, null_frag, v64db, v64db, 3, 8>;
|
||||
|
||||
// Multiply and add.
|
||||
def VFMADB : TernaryVRRe<"vfmadb", 0xE78F, null_frag, v128db, v128db, 0, 3>;
|
||||
def VFMADB : TernaryVRRe<"vfmadb", 0xE78F, fma, v128db, v128db, 0, 3>;
|
||||
def WFMADB : TernaryVRRe<"wfmadb", 0xE78F, null_frag, v64db, v64db, 8, 3>;
|
||||
|
||||
// Multiply and subtract.
|
||||
def VFMSDB : TernaryVRRe<"vfmsdb", 0xE78E, null_frag, v128db, v128db, 0, 3>;
|
||||
def VFMSDB : TernaryVRRe<"vfmsdb", 0xE78E, fms, v128db, v128db, 0, 3>;
|
||||
def WFMSDB : TernaryVRRe<"wfmsdb", 0xE78E, null_frag, v64db, v64db, 8, 3>;
|
||||
|
||||
// Load complement,
|
||||
def VFLCDB : UnaryVRRa<"vflcdb", 0xE7CC, null_frag, v128db, v128db, 3, 0, 0>;
|
||||
def VFLCDB : UnaryVRRa<"vflcdb", 0xE7CC, fneg, v128db, v128db, 3, 0, 0>;
|
||||
def WFLCDB : UnaryVRRa<"wflcdb", 0xE7CC, null_frag, v64db, v64db, 3, 8, 0>;
|
||||
|
||||
// Load negative.
|
||||
def VFLNDB : UnaryVRRa<"vflndb", 0xE7CC, null_frag, v128db, v128db, 3, 0, 1>;
|
||||
def VFLNDB : UnaryVRRa<"vflndb", 0xE7CC, fnabs, v128db, v128db, 3, 0, 1>;
|
||||
def WFLNDB : UnaryVRRa<"wflndb", 0xE7CC, null_frag, v64db, v64db, 3, 8, 1>;
|
||||
|
||||
// Load positive.
|
||||
def VFLPDB : UnaryVRRa<"vflpdb", 0xE7CC, null_frag, v128db, v128db, 3, 0, 2>;
|
||||
def VFLPDB : UnaryVRRa<"vflpdb", 0xE7CC, fabs, v128db, v128db, 3, 0, 2>;
|
||||
def WFLPDB : UnaryVRRa<"wflpdb", 0xE7CC, null_frag, v64db, v64db, 3, 8, 2>;
|
||||
|
||||
// Square root.
|
||||
def VFSQDB : UnaryVRRa<"vfsqdb", 0xE7CE, null_frag, v128db, v128db, 3, 0>;
|
||||
def VFSQDB : UnaryVRRa<"vfsqdb", 0xE7CE, fsqrt, v128db, v128db, 3, 0>;
|
||||
def WFSQDB : UnaryVRRa<"wfsqdb", 0xE7CE, null_frag, v64db, v64db, 3, 8>;
|
||||
|
||||
// Subtract.
|
||||
def VFSDB : BinaryVRRc<"vfsdb", 0xE7E2, null_frag, v128db, v128db, 3, 0>;
|
||||
def VFSDB : BinaryVRRc<"vfsdb", 0xE7E2, fsub, v128db, v128db, 3, 0>;
|
||||
def WFSDB : BinaryVRRc<"wfsdb", 0xE7E2, null_frag, v64db, v64db, 3, 8>;
|
||||
|
||||
// Test data class immediate.
|
||||
|
@ -824,19 +857,19 @@ let Predicates = [FeatureVector] in {
|
|||
def WFKDB : CompareVRRa<"wfkdb", 0xE7CA, null_frag, v64db, 3>;
|
||||
|
||||
// Compare equal.
|
||||
defm VFCEDB : BinaryVRRcSPair<"vfcedb", 0xE7E8, null_frag, null_frag,
|
||||
defm VFCEDB : BinaryVRRcSPair<"vfcedb", 0xE7E8, z_vfcmpe, null_frag,
|
||||
v128g, v128db, 3, 0>;
|
||||
defm WFCEDB : BinaryVRRcSPair<"wfcedb", 0xE7E8, null_frag, null_frag,
|
||||
v64g, v64db, 3, 8>;
|
||||
|
||||
// Compare high.
|
||||
defm VFCHDB : BinaryVRRcSPair<"vfchdb", 0xE7EB, null_frag, null_frag,
|
||||
defm VFCHDB : BinaryVRRcSPair<"vfchdb", 0xE7EB, z_vfcmph, null_frag,
|
||||
v128g, v128db, 3, 0>;
|
||||
defm WFCHDB : BinaryVRRcSPair<"wfchdb", 0xE7EB, null_frag, null_frag,
|
||||
v64g, v64db, 3, 8>;
|
||||
|
||||
// Compare high or equal.
|
||||
defm VFCHEDB : BinaryVRRcSPair<"vfchedb", 0xE7EA, null_frag, null_frag,
|
||||
defm VFCHEDB : BinaryVRRcSPair<"vfchedb", 0xE7EA, z_vfcmphe, null_frag,
|
||||
v128g, v128db, 3, 0>;
|
||||
defm WFCHEDB : BinaryVRRcSPair<"wfchedb", 0xE7EA, null_frag, null_frag,
|
||||
v64g, v64db, 3, 8>;
|
||||
|
@ -849,18 +882,27 @@ let Predicates = [FeatureVector] in {
|
|||
def : Pat<(v16i8 (bitconvert (v8i16 VR128:$src))), (v16i8 VR128:$src)>;
|
||||
def : Pat<(v16i8 (bitconvert (v4i32 VR128:$src))), (v16i8 VR128:$src)>;
|
||||
def : Pat<(v16i8 (bitconvert (v2i64 VR128:$src))), (v16i8 VR128:$src)>;
|
||||
def : Pat<(v16i8 (bitconvert (v2f64 VR128:$src))), (v16i8 VR128:$src)>;
|
||||
|
||||
def : Pat<(v8i16 (bitconvert (v16i8 VR128:$src))), (v8i16 VR128:$src)>;
|
||||
def : Pat<(v8i16 (bitconvert (v4i32 VR128:$src))), (v8i16 VR128:$src)>;
|
||||
def : Pat<(v8i16 (bitconvert (v2i64 VR128:$src))), (v8i16 VR128:$src)>;
|
||||
def : Pat<(v8i16 (bitconvert (v2f64 VR128:$src))), (v8i16 VR128:$src)>;
|
||||
|
||||
def : Pat<(v4i32 (bitconvert (v16i8 VR128:$src))), (v4i32 VR128:$src)>;
|
||||
def : Pat<(v4i32 (bitconvert (v8i16 VR128:$src))), (v4i32 VR128:$src)>;
|
||||
def : Pat<(v4i32 (bitconvert (v2i64 VR128:$src))), (v4i32 VR128:$src)>;
|
||||
def : Pat<(v4i32 (bitconvert (v2f64 VR128:$src))), (v4i32 VR128:$src)>;
|
||||
|
||||
def : Pat<(v2i64 (bitconvert (v16i8 VR128:$src))), (v2i64 VR128:$src)>;
|
||||
def : Pat<(v2i64 (bitconvert (v8i16 VR128:$src))), (v2i64 VR128:$src)>;
|
||||
def : Pat<(v2i64 (bitconvert (v4i32 VR128:$src))), (v2i64 VR128:$src)>;
|
||||
def : Pat<(v2i64 (bitconvert (v2f64 VR128:$src))), (v2i64 VR128:$src)>;
|
||||
|
||||
def : Pat<(v2f64 (bitconvert (v16i8 VR128:$src))), (v2f64 VR128:$src)>;
|
||||
def : Pat<(v2f64 (bitconvert (v8i16 VR128:$src))), (v2f64 VR128:$src)>;
|
||||
def : Pat<(v2f64 (bitconvert (v4i32 VR128:$src))), (v2f64 VR128:$src)>;
|
||||
def : Pat<(v2f64 (bitconvert (v2i64 VR128:$src))), (v2f64 VR128:$src)>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Replicating scalars
|
||||
|
@ -880,6 +922,46 @@ def : VectorReplicateScalar<v4i32, VREPF, 1>;
|
|||
def : Pat<(v2i64 (z_replicate GR64:$scalar)),
|
||||
(VLVGP GR64:$scalar, GR64:$scalar)>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Floating-point insertion and extraction
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
// Floating-point values are stored in element 0 of the corresponding
|
||||
// vector register. Scalar to vector conversion is just a subreg and
|
||||
// scalar replication can just replicate element 0 of the vector register.
|
||||
multiclass ScalarToVectorFP<Instruction vrep, ValueType vt, RegisterOperand cls,
|
||||
SubRegIndex subreg> {
|
||||
def : Pat<(vt (scalar_to_vector cls:$scalar)),
|
||||
(INSERT_SUBREG (vt (IMPLICIT_DEF)), cls:$scalar, subreg)>;
|
||||
def : Pat<(vt (z_replicate cls:$scalar)),
|
||||
(vrep (INSERT_SUBREG (vt (IMPLICIT_DEF)), cls:$scalar,
|
||||
subreg), 0)>;
|
||||
}
|
||||
defm : ScalarToVectorFP<VREPG, v2f64, FP64, subreg_r64>;
|
||||
|
||||
// Match v2f64 insertions. The AddedComplexity counters the 3 added by
|
||||
// TableGen for the base register operand in VLVG-based integer insertions
|
||||
// and ensures that this version is strictly better.
|
||||
let AddedComplexity = 4 in {
|
||||
def : Pat<(z_vector_insert (v2f64 VR128:$vec), FP64:$elt, 0),
|
||||
(VPDI (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FP64:$elt,
|
||||
subreg_r64), VR128:$vec, 1)>;
|
||||
def : Pat<(z_vector_insert (v2f64 VR128:$vec), FP64:$elt, 1),
|
||||
(VPDI VR128:$vec, (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FP64:$elt,
|
||||
subreg_r64), 0)>;
|
||||
}
|
||||
|
||||
// We extract f64 element X by replicating (for elements other than 0)
|
||||
// and then taking a high subreg. The AddedComplexity counters the 3
|
||||
// added by TableGen for the base register operand in VLGV-based integer
|
||||
// extractions and ensures that this version is strictly better.
|
||||
let AddedComplexity = 4 in {
|
||||
def : Pat<(f64 (z_vector_extract (v2f64 VR128:$vec), 0)),
|
||||
(EXTRACT_SUBREG VR128:$vec, subreg_r64)>;
|
||||
def : Pat<(f64 (z_vector_extract (v2f64 VR128:$vec), imm32zx1:$index)),
|
||||
(EXTRACT_SUBREG (VREPG VR128:$vec, imm32zx1:$index), subreg_r64)>;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// String instructions
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
|
|
@ -200,6 +200,9 @@ def z_vsum : SDNode<"SystemZISD::VSUM", SDT_ZVecBinaryConv>;
|
|||
def z_vicmpe : SDNode<"SystemZISD::VICMPE", SDT_ZVecBinary>;
|
||||
def z_vicmph : SDNode<"SystemZISD::VICMPH", SDT_ZVecBinary>;
|
||||
def z_vicmphl : SDNode<"SystemZISD::VICMPHL", SDT_ZVecBinary>;
|
||||
def z_vfcmpe : SDNode<"SystemZISD::VFCMPE", SDT_ZVecBinaryConv>;
|
||||
def z_vfcmph : SDNode<"SystemZISD::VFCMPH", SDT_ZVecBinaryConv>;
|
||||
def z_vfcmphe : SDNode<"SystemZISD::VFCMPHE", SDT_ZVecBinaryConv>;
|
||||
|
||||
class AtomicWOp<string name, SDTypeProfile profile = SDT_ZAtomicLoadBinaryW>
|
||||
: SDNode<"SystemZISD::"##name, profile,
|
||||
|
@ -468,6 +471,10 @@ def z_inegabs64 : PatFrag<(ops node:$src), (ineg (z_iabs64 node:$src))>;
|
|||
def z_muladd : PatFrag<(ops node:$src1, node:$src2, node:$src3),
|
||||
(add (mul node:$src1, node:$src2), node:$src3)>;
|
||||
|
||||
// Fused multiply-subtract, using the natural operand order.
|
||||
def fms : PatFrag<(ops node:$src1, node:$src2, node:$src3),
|
||||
(fma node:$src1, node:$src2, (fneg node:$src3))>;
|
||||
|
||||
// Fused multiply-add and multiply-subtract, but with the order of the
|
||||
// operands matching SystemZ's MA and MS instructions.
|
||||
def z_fma : PatFrag<(ops node:$src1, node:$src2, node:$src3),
|
||||
|
@ -501,6 +508,7 @@ def z_replicate_loadi8 : z_replicate_load<i32, anyextloadi8>;
|
|||
def z_replicate_loadi16 : z_replicate_load<i32, anyextloadi16>;
|
||||
def z_replicate_loadi32 : z_replicate_load<i32, load>;
|
||||
def z_replicate_loadi64 : z_replicate_load<i64, load>;
|
||||
def z_replicate_loadf64 : z_replicate_load<f64, load>;
|
||||
|
||||
// Load a scalar and insert it into a single element of a vector.
|
||||
class z_vle<ValueType scalartype, SDPatternOperator load>
|
||||
|
@ -511,6 +519,7 @@ def z_vlei8 : z_vle<i32, anyextloadi8>;
|
|||
def z_vlei16 : z_vle<i32, anyextloadi16>;
|
||||
def z_vlei32 : z_vle<i32, load>;
|
||||
def z_vlei64 : z_vle<i64, load>;
|
||||
def z_vlef64 : z_vle<f64, load>;
|
||||
|
||||
// Load a scalar and insert it into the low element of the high i64 of a
|
||||
// zeroed vector.
|
||||
|
@ -523,6 +532,10 @@ def z_vllezi16 : z_vllez<i32, anyextloadi16, 3>;
|
|||
def z_vllezi32 : z_vllez<i32, load, 1>;
|
||||
def z_vllezi64 : PatFrag<(ops node:$addr),
|
||||
(z_join_dwords (i64 (load node:$addr)), (i64 0))>;
|
||||
def z_vllezf64 : PatFrag<(ops node:$addr),
|
||||
(z_merge_high
|
||||
(scalar_to_vector (f64 (load node:$addr))),
|
||||
(z_vzero))>;
|
||||
|
||||
// Store one element of a vector.
|
||||
class z_vste<ValueType scalartype, SDPatternOperator store>
|
||||
|
@ -533,6 +546,7 @@ def z_vstei8 : z_vste<i32, truncstorei8>;
|
|||
def z_vstei16 : z_vste<i32, truncstorei16>;
|
||||
def z_vstei32 : z_vste<i32, store>;
|
||||
def z_vstei64 : z_vste<i64, store>;
|
||||
def z_vstef64 : z_vste<f64, store>;
|
||||
|
||||
// Arithmetic negation on vectors.
|
||||
def z_vneg : PatFrag<(ops node:$x), (sub (z_vzero), node:$x)>;
|
||||
|
|
|
@ -153,3 +153,17 @@ multiclass CompareZeroFP<Instruction insn, RegisterOperand cls> {
|
|||
// The sign of the zero makes no difference.
|
||||
def : Pat<(z_fcmp cls:$reg, (fpimmneg0)), (insn cls:$reg, cls:$reg)>;
|
||||
}
|
||||
|
||||
// Use INSN for performing binary operation OPERATION of type VT
|
||||
// on registers of class CLS.
|
||||
class BinaryRRWithType<Instruction insn, RegisterOperand cls,
|
||||
SDPatternOperator operator, ValueType vt>
|
||||
: Pat<(vt (operator cls:$x, cls:$y)), (insn cls:$x, cls:$y)>;
|
||||
|
||||
// Use INSN to perform conversion operation OPERATOR, with the input being
|
||||
// TR2 and the output being TR1. SUPPRESS is 4 to suppress inexact conditions
|
||||
// and 0 to allow them. MODE is the rounding mode to use.
|
||||
class FPConversion<Instruction insn, SDPatternOperator operator, TypedReg tr1,
|
||||
TypedReg tr2, bits<3> suppress, bits<4> mode>
|
||||
: Pat<(tr1.vt (operator (tr2.vt tr2.op:$vec))),
|
||||
(insn tr2.op:$vec, suppress, mode)>;
|
||||
|
|
|
@ -0,0 +1,24 @@
|
|||
; Test v2f64 absolute.
|
||||
;
|
||||
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
|
||||
|
||||
declare <2 x double> @llvm.fabs.v2f64(<2 x double>)
|
||||
|
||||
; Test a plain absolute.
|
||||
define <2 x double> @f1(<2 x double> %val) {
|
||||
; CHECK-LABEL: f1:
|
||||
; CHECK: vflpdb %v24, %v24
|
||||
; CHECK: br %r14
|
||||
%ret = call <2 x double> @llvm.fabs.v2f64(<2 x double> %val)
|
||||
ret <2 x double> %ret
|
||||
}
|
||||
|
||||
; Test a negative absolute.
|
||||
define <2 x double> @f2(<2 x double> %val) {
|
||||
; CHECK-LABEL: f2:
|
||||
; CHECK: vflndb %v24, %v24
|
||||
; CHECK: br %r14
|
||||
%abs = call <2 x double> @llvm.fabs.v2f64(<2 x double> %val)
|
||||
%ret = fsub <2 x double> <double -0.0, double -0.0>, %abs
|
||||
ret <2 x double> %ret
|
||||
}
|
|
@ -37,3 +37,13 @@ define <2 x i64> @f4(<2 x i64> %dummy, <2 x i64> %val1, <2 x i64> %val2) {
|
|||
%ret = add <2 x i64> %val1, %val2
|
||||
ret <2 x i64> %ret
|
||||
}
|
||||
|
||||
; Test a v2f64 addition.
|
||||
define <2 x double> @f5(<2 x double> %dummy, <2 x double> %val1,
|
||||
<2 x double> %val2) {
|
||||
; CHECK-LABEL: f5:
|
||||
; CHECK: vfadb %v24, %v26, %v28
|
||||
; CHECK: br %r14
|
||||
%ret = fadd <2 x double> %val1, %val2
|
||||
ret <2 x double> %ret
|
||||
}
|
||||
|
|
|
@ -0,0 +1,337 @@
|
|||
; Test v2f64 comparisons.
|
||||
;
|
||||
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
|
||||
|
||||
; Test oeq.
|
||||
define <2 x i64> @f1(<2 x i64> %dummy, <2 x double> %val1, <2 x double> %val2) {
|
||||
; CHECK-LABEL: f1:
|
||||
; CHECK: vfcedb %v24, %v26, %v28
|
||||
; CHECK-NEXT: br %r14
|
||||
%cmp = fcmp oeq <2 x double> %val1, %val2
|
||||
%ret = sext <2 x i1> %cmp to <2 x i64>
|
||||
ret <2 x i64> %ret
|
||||
}
|
||||
|
||||
; Test one.
|
||||
define <2 x i64> @f2(<2 x i64> %dummy, <2 x double> %val1, <2 x double> %val2) {
|
||||
; CHECK-LABEL: f2:
|
||||
; CHECK-DAG: vfchdb [[REG1:%v[0-9]+]], %v28, %v26
|
||||
; CHECK-DAG: vfchdb [[REG2:%v[0-9]+]], %v26, %v28
|
||||
; CHECK: vo %v24, [[REG1]], [[REG2]]
|
||||
; CHECK-NEXT: br %r14
|
||||
%cmp = fcmp one <2 x double> %val1, %val2
|
||||
%ret = sext <2 x i1> %cmp to <2 x i64>
|
||||
ret <2 x i64> %ret
|
||||
}
|
||||
|
||||
; Test ogt.
|
||||
define <2 x i64> @f3(<2 x i64> %dummy, <2 x double> %val1, <2 x double> %val2) {
|
||||
; CHECK-LABEL: f3:
|
||||
; CHECK: vfchdb %v24, %v26, %v28
|
||||
; CHECK-NEXT: br %r14
|
||||
%cmp = fcmp ogt <2 x double> %val1, %val2
|
||||
%ret = sext <2 x i1> %cmp to <2 x i64>
|
||||
ret <2 x i64> %ret
|
||||
}
|
||||
|
||||
; Test oge.
|
||||
define <2 x i64> @f4(<2 x i64> %dummy, <2 x double> %val1, <2 x double> %val2) {
|
||||
; CHECK-LABEL: f4:
|
||||
; CHECK: vfchedb %v24, %v26, %v28
|
||||
; CHECK-NEXT: br %r14
|
||||
%cmp = fcmp oge <2 x double> %val1, %val2
|
||||
%ret = sext <2 x i1> %cmp to <2 x i64>
|
||||
ret <2 x i64> %ret
|
||||
}
|
||||
|
||||
; Test ole.
|
||||
define <2 x i64> @f5(<2 x i64> %dummy, <2 x double> %val1, <2 x double> %val2) {
|
||||
; CHECK-LABEL: f5:
|
||||
; CHECK: vfchedb %v24, %v28, %v26
|
||||
; CHECK-NEXT: br %r14
|
||||
%cmp = fcmp ole <2 x double> %val1, %val2
|
||||
%ret = sext <2 x i1> %cmp to <2 x i64>
|
||||
ret <2 x i64> %ret
|
||||
}
|
||||
|
||||
; Test olt.
|
||||
define <2 x i64> @f6(<2 x i64> %dummy, <2 x double> %val1, <2 x double> %val2) {
|
||||
; CHECK-LABEL: f6:
|
||||
; CHECK: vfchdb %v24, %v28, %v26
|
||||
; CHECK-NEXT: br %r14
|
||||
%cmp = fcmp olt <2 x double> %val1, %val2
|
||||
%ret = sext <2 x i1> %cmp to <2 x i64>
|
||||
ret <2 x i64> %ret
|
||||
}
|
||||
|
||||
; Test ueq.
|
||||
define <2 x i64> @f7(<2 x i64> %dummy, <2 x double> %val1, <2 x double> %val2) {
|
||||
; CHECK-LABEL: f7:
|
||||
; CHECK-DAG: vfchdb [[REG1:%v[0-9]+]], %v28, %v26
|
||||
; CHECK-DAG: vfchdb [[REG2:%v[0-9]+]], %v26, %v28
|
||||
; CHECK: vno %v24, [[REG1]], [[REG2]]
|
||||
; CHECK-NEXT: br %r14
|
||||
%cmp = fcmp ueq <2 x double> %val1, %val2
|
||||
%ret = sext <2 x i1> %cmp to <2 x i64>
|
||||
ret <2 x i64> %ret
|
||||
}
|
||||
|
||||
; Test une.
|
||||
define <2 x i64> @f8(<2 x i64> %dummy, <2 x double> %val1, <2 x double> %val2) {
|
||||
; CHECK-LABEL: f8:
|
||||
; CHECK: vfcedb [[REG:%v[0-9]+]], %v26, %v28
|
||||
; CHECK-NEXT: vno %v24, [[REG]], [[REG]]
|
||||
; CHECK-NEXT: br %r14
|
||||
%cmp = fcmp une <2 x double> %val1, %val2
|
||||
%ret = sext <2 x i1> %cmp to <2 x i64>
|
||||
ret <2 x i64> %ret
|
||||
}
|
||||
|
||||
; Test ugt.
|
||||
define <2 x i64> @f9(<2 x i64> %dummy, <2 x double> %val1, <2 x double> %val2) {
|
||||
; CHECK-LABEL: f9:
|
||||
; CHECK: vfchedb [[REG:%v[0-9]+]], %v28, %v26
|
||||
; CHECK-NEXT: vno %v24, [[REG]], [[REG]]
|
||||
; CHECK-NEXT: br %r14
|
||||
%cmp = fcmp ugt <2 x double> %val1, %val2
|
||||
%ret = sext <2 x i1> %cmp to <2 x i64>
|
||||
ret <2 x i64> %ret
|
||||
}
|
||||
|
||||
; Test uge.
|
||||
define <2 x i64> @f10(<2 x i64> %dummy, <2 x double> %val1,
|
||||
<2 x double> %val2) {
|
||||
; CHECK-LABEL: f10:
|
||||
; CHECK: vfchdb [[REG:%v[0-9]+]], %v28, %v26
|
||||
; CHECK-NEXT: vno %v24, [[REG]], [[REG]]
|
||||
; CHECK-NEXT: br %r14
|
||||
%cmp = fcmp uge <2 x double> %val1, %val2
|
||||
%ret = sext <2 x i1> %cmp to <2 x i64>
|
||||
ret <2 x i64> %ret
|
||||
}
|
||||
|
||||
; Test ule.
|
||||
define <2 x i64> @f11(<2 x i64> %dummy, <2 x double> %val1,
|
||||
<2 x double> %val2) {
|
||||
; CHECK-LABEL: f11:
|
||||
; CHECK: vfchdb [[REG:%v[0-9]+]], %v26, %v28
|
||||
; CHECK-NEXT: vno %v24, [[REG]], [[REG]]
|
||||
; CHECK-NEXT: br %r14
|
||||
%cmp = fcmp ule <2 x double> %val1, %val2
|
||||
%ret = sext <2 x i1> %cmp to <2 x i64>
|
||||
ret <2 x i64> %ret
|
||||
}
|
||||
|
||||
; Test ult.
|
||||
define <2 x i64> @f12(<2 x i64> %dummy, <2 x double> %val1,
|
||||
<2 x double> %val2) {
|
||||
; CHECK-LABEL: f12:
|
||||
; CHECK: vfchedb [[REG:%v[0-9]+]], %v26, %v28
|
||||
; CHECK-NEXT: vno %v24, [[REG]], [[REG]]
|
||||
; CHECK-NEXT: br %r14
|
||||
%cmp = fcmp ult <2 x double> %val1, %val2
|
||||
%ret = sext <2 x i1> %cmp to <2 x i64>
|
||||
ret <2 x i64> %ret
|
||||
}
|
||||
|
||||
; Test ord.
|
||||
define <2 x i64> @f13(<2 x i64> %dummy, <2 x double> %val1,
|
||||
<2 x double> %val2) {
|
||||
; CHECK-LABEL: f13:
|
||||
; CHECK-DAG: vfchdb [[REG1:%v[0-9]+]], %v28, %v26
|
||||
; CHECK-DAG: vfchedb [[REG2:%v[0-9]+]], %v26, %v28
|
||||
; CHECK: vo %v24, [[REG1]], [[REG2]]
|
||||
; CHECK-NEXT: br %r14
|
||||
%cmp = fcmp ord <2 x double> %val1, %val2
|
||||
%ret = sext <2 x i1> %cmp to <2 x i64>
|
||||
ret <2 x i64> %ret
|
||||
}
|
||||
|
||||
; Test uno.
|
||||
define <2 x i64> @f14(<2 x i64> %dummy, <2 x double> %val1,
|
||||
<2 x double> %val2) {
|
||||
; CHECK-LABEL: f14:
|
||||
; CHECK-DAG: vfchdb [[REG1:%v[0-9]+]], %v28, %v26
|
||||
; CHECK-DAG: vfchedb [[REG2:%v[0-9]+]], %v26, %v28
|
||||
; CHECK: vno %v24, [[REG1]], [[REG2]]
|
||||
; CHECK-NEXT: br %r14
|
||||
%cmp = fcmp uno <2 x double> %val1, %val2
|
||||
%ret = sext <2 x i1> %cmp to <2 x i64>
|
||||
ret <2 x i64> %ret
|
||||
}
|
||||
|
||||
; Test oeq selects.
|
||||
define <2 x double> @f15(<2 x double> %val1, <2 x double> %val2,
|
||||
<2 x double> %val3, <2 x double> %val4) {
|
||||
; CHECK-LABEL: f15:
|
||||
; CHECK: vfcedb [[REG:%v[0-9]+]], %v24, %v26
|
||||
; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]]
|
||||
; CHECK-NEXT: br %r14
|
||||
%cmp = fcmp oeq <2 x double> %val1, %val2
|
||||
%ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4
|
||||
ret <2 x double> %ret
|
||||
}
|
||||
|
||||
; Test one selects.
|
||||
define <2 x double> @f16(<2 x double> %val1, <2 x double> %val2,
|
||||
<2 x double> %val3, <2 x double> %val4) {
|
||||
; CHECK-LABEL: f16:
|
||||
; CHECK-DAG: vfchdb [[REG1:%v[0-9]+]], %v26, %v24
|
||||
; CHECK-DAG: vfchdb [[REG2:%v[0-9]+]], %v24, %v26
|
||||
; CHECK: vo [[REG:%v[0-9]+]], [[REG1]], [[REG2]]
|
||||
; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]]
|
||||
; CHECK-NEXT: br %r14
|
||||
%cmp = fcmp one <2 x double> %val1, %val2
|
||||
%ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4
|
||||
ret <2 x double> %ret
|
||||
}
|
||||
|
||||
; Test ogt selects.
|
||||
define <2 x double> @f17(<2 x double> %val1, <2 x double> %val2,
|
||||
<2 x double> %val3, <2 x double> %val4) {
|
||||
; CHECK-LABEL: f17:
|
||||
; CHECK: vfchdb [[REG:%v[0-9]+]], %v24, %v26
|
||||
; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]]
|
||||
; CHECK-NEXT: br %r14
|
||||
%cmp = fcmp ogt <2 x double> %val1, %val2
|
||||
%ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4
|
||||
ret <2 x double> %ret
|
||||
}
|
||||
|
||||
; Test oge selects.
|
||||
define <2 x double> @f18(<2 x double> %val1, <2 x double> %val2,
|
||||
<2 x double> %val3, <2 x double> %val4) {
|
||||
; CHECK-LABEL: f18:
|
||||
; CHECK: vfchedb [[REG:%v[0-9]+]], %v24, %v26
|
||||
; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]]
|
||||
; CHECK-NEXT: br %r14
|
||||
%cmp = fcmp oge <2 x double> %val1, %val2
|
||||
%ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4
|
||||
ret <2 x double> %ret
|
||||
}
|
||||
|
||||
; Test ole selects.
|
||||
define <2 x double> @f19(<2 x double> %val1, <2 x double> %val2,
|
||||
<2 x double> %val3, <2 x double> %val4) {
|
||||
; CHECK-LABEL: f19:
|
||||
; CHECK: vfchedb [[REG:%v[0-9]+]], %v26, %v24
|
||||
; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]]
|
||||
; CHECK-NEXT: br %r14
|
||||
%cmp = fcmp ole <2 x double> %val1, %val2
|
||||
%ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4
|
||||
ret <2 x double> %ret
|
||||
}
|
||||
|
||||
; Test olt selects.
|
||||
define <2 x double> @f20(<2 x double> %val1, <2 x double> %val2,
|
||||
<2 x double> %val3, <2 x double> %val4) {
|
||||
; CHECK-LABEL: f20:
|
||||
; CHECK: vfchdb [[REG:%v[0-9]+]], %v26, %v24
|
||||
; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]]
|
||||
; CHECK-NEXT: br %r14
|
||||
%cmp = fcmp olt <2 x double> %val1, %val2
|
||||
%ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4
|
||||
ret <2 x double> %ret
|
||||
}
|
||||
|
||||
; Test ueq selects.
|
||||
define <2 x double> @f21(<2 x double> %val1, <2 x double> %val2,
|
||||
<2 x double> %val3, <2 x double> %val4) {
|
||||
; CHECK-LABEL: f21:
|
||||
; CHECK-DAG: vfchdb [[REG1:%v[0-9]+]], %v26, %v24
|
||||
; CHECK-DAG: vfchdb [[REG2:%v[0-9]+]], %v24, %v26
|
||||
; CHECK: vo [[REG:%v[0-9]+]], [[REG1]], [[REG2]]
|
||||
; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]]
|
||||
; CHECK-NEXT: br %r14
|
||||
%cmp = fcmp ueq <2 x double> %val1, %val2
|
||||
%ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4
|
||||
ret <2 x double> %ret
|
||||
}
|
||||
|
||||
; Test une selects.
|
||||
define <2 x double> @f22(<2 x double> %val1, <2 x double> %val2,
|
||||
<2 x double> %val3, <2 x double> %val4) {
|
||||
; CHECK-LABEL: f22:
|
||||
; CHECK: vfcedb [[REG:%v[0-9]+]], %v24, %v26
|
||||
; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]]
|
||||
; CHECK-NEXT: br %r14
|
||||
%cmp = fcmp une <2 x double> %val1, %val2
|
||||
%ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4
|
||||
ret <2 x double> %ret
|
||||
}
|
||||
|
||||
; Test ugt selects.
|
||||
define <2 x double> @f23(<2 x double> %val1, <2 x double> %val2,
|
||||
<2 x double> %val3, <2 x double> %val4) {
|
||||
; CHECK-LABEL: f23:
|
||||
; CHECK: vfchedb [[REG:%v[0-9]+]], %v26, %v24
|
||||
; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]]
|
||||
; CHECK-NEXT: br %r14
|
||||
%cmp = fcmp ugt <2 x double> %val1, %val2
|
||||
%ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4
|
||||
ret <2 x double> %ret
|
||||
}
|
||||
|
||||
; Test uge selects.
|
||||
define <2 x double> @f24(<2 x double> %val1, <2 x double> %val2,
|
||||
<2 x double> %val3, <2 x double> %val4) {
|
||||
; CHECK-LABEL: f24:
|
||||
; CHECK: vfchdb [[REG:%v[0-9]+]], %v26, %v24
|
||||
; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]]
|
||||
; CHECK-NEXT: br %r14
|
||||
%cmp = fcmp uge <2 x double> %val1, %val2
|
||||
%ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4
|
||||
ret <2 x double> %ret
|
||||
}
|
||||
|
||||
; Test ule selects.
|
||||
define <2 x double> @f25(<2 x double> %val1, <2 x double> %val2,
|
||||
<2 x double> %val3, <2 x double> %val4) {
|
||||
; CHECK-LABEL: f25:
|
||||
; CHECK: vfchdb [[REG:%v[0-9]+]], %v24, %v26
|
||||
; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]]
|
||||
; CHECK-NEXT: br %r14
|
||||
%cmp = fcmp ule <2 x double> %val1, %val2
|
||||
%ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4
|
||||
ret <2 x double> %ret
|
||||
}
|
||||
|
||||
; Test ult selects.
|
||||
define <2 x double> @f26(<2 x double> %val1, <2 x double> %val2,
|
||||
<2 x double> %val3, <2 x double> %val4) {
|
||||
; CHECK-LABEL: f26:
|
||||
; CHECK: vfchedb [[REG:%v[0-9]+]], %v24, %v26
|
||||
; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]]
|
||||
; CHECK-NEXT: br %r14
|
||||
%cmp = fcmp ult <2 x double> %val1, %val2
|
||||
%ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4
|
||||
ret <2 x double> %ret
|
||||
}
|
||||
|
||||
; Test ord selects.
|
||||
define <2 x double> @f27(<2 x double> %val1, <2 x double> %val2,
|
||||
<2 x double> %val3, <2 x double> %val4) {
|
||||
; CHECK-LABEL: f27:
|
||||
; CHECK-DAG: vfchdb [[REG1:%v[0-9]+]], %v26, %v24
|
||||
; CHECK-DAG: vfchedb [[REG2:%v[0-9]+]], %v24, %v26
|
||||
; CHECK: vo [[REG:%v[0-9]+]], [[REG1]], [[REG2]]
|
||||
; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]]
|
||||
; CHECK-NEXT: br %r14
|
||||
%cmp = fcmp ord <2 x double> %val1, %val2
|
||||
%ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4
|
||||
ret <2 x double> %ret
|
||||
}
|
||||
|
||||
; Test uno selects.
|
||||
define <2 x double> @f28(<2 x double> %val1, <2 x double> %val2,
|
||||
<2 x double> %val3, <2 x double> %val4) {
|
||||
; CHECK-LABEL: f28:
|
||||
; CHECK-DAG: vfchdb [[REG1:%v[0-9]+]], %v26, %v24
|
||||
; CHECK-DAG: vfchedb [[REG2:%v[0-9]+]], %v24, %v26
|
||||
; CHECK: vo [[REG:%v[0-9]+]], [[REG1]], [[REG2]]
|
||||
; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]]
|
||||
; CHECK-NEXT: br %r14
|
||||
%cmp = fcmp uno <2 x double> %val1, %val2
|
||||
%ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4
|
||||
ret <2 x double> %ret
|
||||
}
|
|
@ -0,0 +1,43 @@
|
|||
; Test vector byte masks, v2f64 version.
|
||||
;
|
||||
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
|
||||
|
||||
; Test an all-zeros vector.
|
||||
define <2 x double> @f1() {
|
||||
; CHECK-LABEL: f1:
|
||||
; CHECK: vgbm %v24, 0
|
||||
; CHECK: br %r14
|
||||
ret <2 x double> zeroinitializer
|
||||
}
|
||||
|
||||
; Test an all-ones vector.
|
||||
define <2 x double> @f2() {
|
||||
; CHECK-LABEL: f2:
|
||||
; CHECK: vgbm %v24, 65535
|
||||
; CHECK: br %r14
|
||||
ret <2 x double> <double 0xffffffffffffffff, double 0xffffffffffffffff>
|
||||
}
|
||||
|
||||
; Test a mixed vector (mask 0x8c76).
|
||||
define <2 x double> @f3() {
|
||||
; CHECK-LABEL: f3:
|
||||
; CHECK: vgbm %v24, 35958
|
||||
; CHECK: br %r14
|
||||
ret <2 x double> <double 0xff000000ffff0000, double 0x00ffffff00ffff00>
|
||||
}
|
||||
|
||||
; Test that undefs are treated as zero (mask 0x8c00).
|
||||
define <2 x double> @f4() {
|
||||
; CHECK-LABEL: f4:
|
||||
; CHECK: vgbm %v24, 35840
|
||||
; CHECK: br %r14
|
||||
ret <2 x double> <double 0xff000000ffff0000, double undef>
|
||||
}
|
||||
|
||||
; Test that we don't use VGBM if one of the bytes is not 0 or 0xff.
|
||||
define <2 x double> @f5() {
|
||||
; CHECK-LABEL: f5:
|
||||
; CHECK-NOT: vgbm
|
||||
; CHECK: br %r14
|
||||
ret <2 x double> <double 0xfe000000ffff0000, double 0x00ffffff00ffff00>
|
||||
}
|
|
@ -0,0 +1,169 @@
|
|||
; Test vector replicates, v2f64 version.
|
||||
;
|
||||
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
|
||||
|
||||
; Test a byte-granularity replicate with the lowest useful value.
|
||||
define <2 x double> @f1() {
|
||||
; CHECK-LABEL: f1:
|
||||
; CHECK: vrepib %v24, 1
|
||||
; CHECK: br %r14
|
||||
ret <2 x double> <double 0x0101010101010101, double 0x0101010101010101>
|
||||
}
|
||||
|
||||
; Test a byte-granularity replicate with an arbitrary value.
|
||||
define <2 x double> @f2() {
|
||||
; CHECK-LABEL: f2:
|
||||
; CHECK: vrepib %v24, -55
|
||||
; CHECK: br %r14
|
||||
ret <2 x double> <double 0xc9c9c9c9c9c9c9c9, double 0xc9c9c9c9c9c9c9c9>
|
||||
}
|
||||
|
||||
; Test a byte-granularity replicate with the highest useful value.
|
||||
define <2 x double> @f3() {
|
||||
; CHECK-LABEL: f3:
|
||||
; CHECK: vrepib %v24, -2
|
||||
; CHECK: br %r14
|
||||
ret <2 x double> <double 0xfefefefefefefefe, double 0xfefefefefefefefe>
|
||||
}
|
||||
|
||||
; Test a halfword-granularity replicate with the lowest useful value.
|
||||
define <2 x double> @f4() {
|
||||
; CHECK-LABEL: f4:
|
||||
; CHECK: vrepih %v24, 1
|
||||
; CHECK: br %r14
|
||||
ret <2 x double> <double 0x0001000100010001, double 0x0001000100010001>
|
||||
}
|
||||
|
||||
; Test a halfword-granularity replicate with an arbitrary value.
|
||||
define <2 x double> @f5() {
|
||||
; CHECK-LABEL: f5:
|
||||
; CHECK: vrepih %v24, 25650
|
||||
; CHECK: br %r14
|
||||
ret <2 x double> <double 0x6432643264326432, double 0x6432643264326432>
|
||||
}
|
||||
|
||||
; Test a halfword-granularity replicate with the highest useful value.
|
||||
define <2 x double> @f6() {
|
||||
; CHECK-LABEL: f6:
|
||||
; CHECK: vrepih %v24, -2
|
||||
; CHECK: br %r14
|
||||
ret <2 x double> <double 0xfffefffefffefffe, double 0xfffefffefffefffe>
|
||||
}
|
||||
|
||||
; Test a word-granularity replicate with the lowest useful positive value.
|
||||
define <2 x double> @f7() {
|
||||
; CHECK-LABEL: f7:
|
||||
; CHECK: vrepif %v24, 1
|
||||
; CHECK: br %r14
|
||||
ret <2 x double> <double 0x0000000100000001, double 0x0000000100000001>
|
||||
}
|
||||
|
||||
; Test a word-granularity replicate with the highest in-range value.
|
||||
define <2 x double> @f8() {
|
||||
; CHECK-LABEL: f8:
|
||||
; CHECK: vrepif %v24, 32767
|
||||
; CHECK: br %r14
|
||||
ret <2 x double> <double 0x00007fff00007fff, double 0x00007fff00007fff>
|
||||
}
|
||||
|
||||
; Test a word-granularity replicate with the next highest value.
|
||||
; This cannot use VREPIF.
|
||||
define <2 x double> @f9() {
|
||||
; CHECK-LABEL: f9:
|
||||
; CHECK-NOT: vrepif
|
||||
; CHECK: br %r14
|
||||
ret <2 x double> <double 0x0000800000008000, double 0x0000800000008000>
|
||||
}
|
||||
|
||||
; Test a word-granularity replicate with the lowest in-range value.
|
||||
define <2 x double> @f10() {
|
||||
; CHECK-LABEL: f10:
|
||||
; CHECK: vrepif %v24, -32768
|
||||
; CHECK: br %r14
|
||||
ret <2 x double> <double 0xffff8000ffff8000, double 0xffff8000ffff8000>
|
||||
}
|
||||
|
||||
; Test a word-granularity replicate with the next lowest value.
|
||||
; This cannot use VREPIF.
|
||||
define <2 x double> @f11() {
|
||||
; CHECK-LABEL: f11:
|
||||
; CHECK-NOT: vrepif
|
||||
; CHECK: br %r14
|
||||
ret <2 x double> <double 0xffff7fffffff7fff, double 0xffff7fffffff7fff>
|
||||
}
|
||||
|
||||
; Test a word-granularity replicate with the highest useful negative value.
|
||||
define <2 x double> @f12() {
|
||||
; CHECK-LABEL: f12:
|
||||
; CHECK: vrepif %v24, -2
|
||||
; CHECK: br %r14
|
||||
ret <2 x double> <double 0xfffffffefffffffe, double 0xfffffffefffffffe>
|
||||
}
|
||||
|
||||
; Test a doubleword-granularity replicate with the lowest useful positive
|
||||
; value.
|
||||
define <2 x double> @f13() {
|
||||
; CHECK-LABEL: f13:
|
||||
; CHECK: vrepig %v24, 1
|
||||
; CHECK: br %r14
|
||||
ret <2 x double> <double 0x0000000000000001, double 0x0000000000000001>
|
||||
}
|
||||
|
||||
; Test a doubleword-granularity replicate with the highest in-range value.
|
||||
define <2 x double> @f14() {
|
||||
; CHECK-LABEL: f14:
|
||||
; CHECK: vrepig %v24, 32767
|
||||
; CHECK: br %r14
|
||||
ret <2 x double> <double 0x0000000000007fff, double 0x0000000000007fff>
|
||||
}
|
||||
|
||||
; Test a doubleword-granularity replicate with the next highest value.
|
||||
; This cannot use VREPIG.
|
||||
define <2 x double> @f15() {
|
||||
; CHECK-LABEL: f15:
|
||||
; CHECK-NOT: vrepig
|
||||
; CHECK: br %r14
|
||||
ret <2 x double> <double 0x0000000000008000, double 0x0000000000008000>
|
||||
}
|
||||
|
||||
; Test a doubleword-granularity replicate with the lowest in-range value.
|
||||
define <2 x double> @f16() {
|
||||
; CHECK-LABEL: f16:
|
||||
; CHECK: vrepig %v24, -32768
|
||||
; CHECK: br %r14
|
||||
ret <2 x double> <double 0xffffffffffff8000, double 0xffffffffffff8000>
|
||||
}
|
||||
|
||||
; Test a doubleword-granularity replicate with the next lowest value.
|
||||
; This cannot use VREPIG.
|
||||
define <2 x double> @f17() {
|
||||
; CHECK-LABEL: f17:
|
||||
; CHECK-NOT: vrepig
|
||||
; CHECK: br %r14
|
||||
ret <2 x double> <double 0xffffffffffff7fff, double 0xffffffffffff7fff>
|
||||
}
|
||||
|
||||
; Test a doubleword-granularity replicate with the highest useful negative
|
||||
; value.
|
||||
define <2 x double> @f18() {
|
||||
; CHECK-LABEL: f18:
|
||||
; CHECK: vrepig %v24, -2
|
||||
; CHECK: br %r14
|
||||
ret <2 x double> <double 0xfffffffffffffffe, double 0xfffffffffffffffe>
|
||||
}
|
||||
|
||||
; Repeat f14 with undefs optimistically treated as 32767.
|
||||
define <2 x double> @f19() {
|
||||
; CHECK-LABEL: f19:
|
||||
; CHECK: vrepig %v24, 32767
|
||||
; CHECK: br %r14
|
||||
ret <2 x double> <double undef, double 0x0000000000007fff>
|
||||
}
|
||||
|
||||
; Repeat f18 with undefs optimistically treated as -2.
|
||||
define <2 x double> @f20() {
|
||||
; CHECK-LABEL: f20:
|
||||
; CHECK: vrepig %v24, -2
|
||||
; CHECK: br %r14
|
||||
ret <2 x double> <double undef, double 0xfffffffffffffffe>
|
||||
}
|
|
@ -0,0 +1,85 @@
|
|||
; Test vector replicates that use VECTOR GENERATE MASK, v2f64 version.
|
||||
;
|
||||
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
|
||||
|
||||
; Test a word-granularity replicate with the lowest value that cannot use
|
||||
; VREPIF.
|
||||
define <2 x double> @f1() {
|
||||
; CHECK-LABEL: f1:
|
||||
; CHECK: vgmf %v24, 16, 16
|
||||
; CHECK: br %r14
|
||||
ret <2 x double> <double 0x0000800000008000, double 0x0000800000008000>
|
||||
}
|
||||
|
||||
; Test a word-granularity replicate that has the lower 17 bits set.
|
||||
define <2 x double> @f2() {
|
||||
; CHECK-LABEL: f2:
|
||||
; CHECK: vgmf %v24, 15, 31
|
||||
; CHECK: br %r14
|
||||
ret <2 x double> <double 0x0001ffff0001ffff, double 0x0001ffff0001ffff>
|
||||
}
|
||||
|
||||
; Test a word-granularity replicate that has the upper 15 bits set.
|
||||
define <2 x double> @f3() {
|
||||
; CHECK-LABEL: f3:
|
||||
; CHECK: vgmf %v24, 0, 14
|
||||
; CHECK: br %r14
|
||||
ret <2 x double> <double 0xfffe0000fffe0000, double 0xfffe0000fffe0000>
|
||||
}
|
||||
|
||||
; Test a word-granularity replicate that has middle bits set.
|
||||
define <2 x double> @f4() {
|
||||
; CHECK-LABEL: f4:
|
||||
; CHECK: vgmf %v24, 2, 11
|
||||
; CHECK: br %r14
|
||||
ret <2 x double> <double 0x3ff000003ff00000, double 0x3ff000003ff00000>
|
||||
}
|
||||
|
||||
; Test a word-granularity replicate with a wrap-around mask.
|
||||
define <2 x double> @f5() {
|
||||
; CHECK-LABEL: f5:
|
||||
; CHECK: vgmf %v24, 17, 15
|
||||
; CHECK: br %r14
|
||||
ret <2 x double> <double 0xffff7fffffff7fff, double 0xffff7fffffff7fff>
|
||||
}
|
||||
|
||||
; Test a doubleword-granularity replicate with the lowest value that cannot
|
||||
; use VREPIG.
|
||||
define <2 x double> @f6() {
|
||||
; CHECK-LABEL: f6:
|
||||
; CHECK: vgmg %v24, 48, 48
|
||||
; CHECK: br %r14
|
||||
ret <2 x double> <double 0x0000000000008000, double 0x0000000000008000>
|
||||
}
|
||||
|
||||
; Test a doubleword-granularity replicate that has the lower 22 bits set.
|
||||
define <2 x double> @f7() {
|
||||
; CHECK-LABEL: f7:
|
||||
; CHECK: vgmg %v24, 42, 63
|
||||
; CHECK: br %r14
|
||||
ret <2 x double> <double 0x000000000003fffff, double 0x000000000003fffff>
|
||||
}
|
||||
|
||||
; Test a doubleword-granularity replicate that has the upper 45 bits set.
|
||||
define <2 x double> @f8() {
|
||||
; CHECK-LABEL: f8:
|
||||
; CHECK: vgmg %v24, 0, 44
|
||||
; CHECK: br %r14
|
||||
ret <2 x double> <double 0xfffffffffff80000, double 0xfffffffffff80000>
|
||||
}
|
||||
|
||||
; Test a doubleword-granularity replicate that has middle bits set.
|
||||
define <2 x double> @f9() {
|
||||
; CHECK-LABEL: f9:
|
||||
; CHECK: vgmg %v24, 2, 11
|
||||
; CHECK: br %r14
|
||||
ret <2 x double> <double 0x3ff0000000000000, double 0x3ff0000000000000>
|
||||
}
|
||||
|
||||
; Test a doubleword-granularity replicate with a wrap-around mask.
|
||||
define <2 x double> @f10() {
|
||||
; CHECK-LABEL: f10:
|
||||
; CHECK: vgmg %v24, 10, 0
|
||||
; CHECK: br %r14
|
||||
ret <2 x double> <double 0x803fffffffffffff, double 0x803fffffffffffff>
|
||||
}
|
|
@ -0,0 +1,95 @@
|
|||
; Test conversions between integer and float elements.
|
||||
;
|
||||
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
|
||||
|
||||
; Test conversion of f64s to signed i64s.
|
||||
define <2 x i64> @f1(<2 x double> %doubles) {
|
||||
; CHECK-LABEL: f1:
|
||||
; CHECK: vcgdb %v24, %v24, 0, 5
|
||||
; CHECK: br %r14
|
||||
%dwords = fptosi <2 x double> %doubles to <2 x i64>
|
||||
ret <2 x i64> %dwords
|
||||
}
|
||||
|
||||
; Test conversion of f64s to unsigned i64s.
|
||||
define <2 x i64> @f2(<2 x double> %doubles) {
|
||||
; CHECK-LABEL: f2:
|
||||
; CHECK: vclgdb %v24, %v24, 0, 5
|
||||
; CHECK: br %r14
|
||||
%dwords = fptoui <2 x double> %doubles to <2 x i64>
|
||||
ret <2 x i64> %dwords
|
||||
}
|
||||
|
||||
; Test conversion of signed i64s to f64s.
|
||||
define <2 x double> @f3(<2 x i64> %dwords) {
|
||||
; CHECK-LABEL: f3:
|
||||
; CHECK: vcdgb %v24, %v24, 0, 0
|
||||
; CHECK: br %r14
|
||||
%doubles = sitofp <2 x i64> %dwords to <2 x double>
|
||||
ret <2 x double> %doubles
|
||||
}
|
||||
|
||||
; Test conversion of unsigned i64s to f64s.
|
||||
define <2 x double> @f4(<2 x i64> %dwords) {
|
||||
; CHECK-LABEL: f4:
|
||||
; CHECK: vcdlgb %v24, %v24, 0, 0
|
||||
; CHECK: br %r14
|
||||
%doubles = uitofp <2 x i64> %dwords to <2 x double>
|
||||
ret <2 x double> %doubles
|
||||
}
|
||||
|
||||
; Test conversion of f64s to signed i32s, which must compile.
|
||||
define void @f5(<2 x double> %doubles, <2 x i32> *%ptr) {
|
||||
%words = fptosi <2 x double> %doubles to <2 x i32>
|
||||
store <2 x i32> %words, <2 x i32> *%ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
; Test conversion of f64s to unsigned i32s, which must compile.
|
||||
define void @f6(<2 x double> %doubles, <2 x i32> *%ptr) {
|
||||
%words = fptoui <2 x double> %doubles to <2 x i32>
|
||||
store <2 x i32> %words, <2 x i32> *%ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
; Test conversion of signed i32s to f64s, which must compile.
|
||||
define <2 x double> @f7(<2 x i32> *%ptr) {
|
||||
%words = load <2 x i32>, <2 x i32> *%ptr
|
||||
%doubles = sitofp <2 x i32> %words to <2 x double>
|
||||
ret <2 x double> %doubles
|
||||
}
|
||||
|
||||
; Test conversion of unsigned i32s to f64s, which must compile.
|
||||
define <2 x double> @f8(<2 x i32> *%ptr) {
|
||||
%words = load <2 x i32>, <2 x i32> *%ptr
|
||||
%doubles = uitofp <2 x i32> %words to <2 x double>
|
||||
ret <2 x double> %doubles
|
||||
}
|
||||
|
||||
; Test conversion of f32s to signed i64s, which must compile.
|
||||
define <2 x i64> @f9(<2 x float> *%ptr) {
|
||||
%floats = load <2 x float>, <2 x float> *%ptr
|
||||
%dwords = fptosi <2 x float> %floats to <2 x i64>
|
||||
ret <2 x i64> %dwords
|
||||
}
|
||||
|
||||
; Test conversion of f32s to unsigned i64s, which must compile.
|
||||
define <2 x i64> @f10(<2 x float> *%ptr) {
|
||||
%floats = load <2 x float>, <2 x float> *%ptr
|
||||
%dwords = fptoui <2 x float> %floats to <2 x i64>
|
||||
ret <2 x i64> %dwords
|
||||
}
|
||||
|
||||
; Test conversion of signed i64s to f32, which must compile.
|
||||
define void @f11(<2 x i64> %dwords, <2 x float> *%ptr) {
|
||||
%floats = sitofp <2 x i64> %dwords to <2 x float>
|
||||
store <2 x float> %floats, <2 x float> *%ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
; Test conversion of unsigned i64s to f32, which must compile.
|
||||
define void @f12(<2 x i64> %dwords, <2 x float> *%ptr) {
|
||||
%floats = uitofp <2 x i64> %dwords to <2 x float>
|
||||
store <2 x float> %floats, <2 x float> *%ptr
|
||||
ret void
|
||||
}
|
|
@ -1,5 +1,5 @@
|
|||
; Test vector division. There is no native support for this, so it's really
|
||||
; a test of the operation legalization code.
|
||||
; Test vector division. There is no native integer support for this,
|
||||
; so the integer cases are really a test of the operation legalization code.
|
||||
;
|
||||
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
|
||||
|
||||
|
@ -60,3 +60,13 @@ define <2 x i64> @f4(<2 x i64> %dummy, <2 x i64> %val1, <2 x i64> %val2) {
|
|||
%ret = sdiv <2 x i64> %val1, %val2
|
||||
ret <2 x i64> %ret
|
||||
}
|
||||
|
||||
; Test a v2f64 division.
|
||||
define <2 x double> @f5(<2 x double> %dummy, <2 x double> %val1,
|
||||
<2 x double> %val2) {
|
||||
; CHECK-LABEL: f5:
|
||||
; CHECK: vfddb %v24, %v26, %v28
|
||||
; CHECK: br %r14
|
||||
%ret = fdiv <2 x double> %val1, %val2
|
||||
ret <2 x double> %ret
|
||||
}
|
||||
|
|
|
@ -0,0 +1,15 @@
|
|||
; Test v2f64 logarithm.
|
||||
;
|
||||
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
|
||||
|
||||
declare <2 x double> @llvm.log.v2f64(<2 x double>)
|
||||
|
||||
define <2 x double> @f1(<2 x double> %val) {
|
||||
; CHECK-LABEL: f1:
|
||||
; CHECK: brasl %r14, log@PLT
|
||||
; CHECK: brasl %r14, log@PLT
|
||||
; CHECK: vmrhg %v24,
|
||||
; CHECK: br %r14
|
||||
%ret = call <2 x double> @llvm.log.v2f64(<2 x double> %val)
|
||||
ret <2 x double> %ret
|
||||
}
|
|
@ -33,3 +33,11 @@ define <2 x i64> @f4(<2 x i64> %val1, <2 x i64> %val2) {
|
|||
; CHECK: br %r14
|
||||
ret <2 x i64> %val2
|
||||
}
|
||||
|
||||
; Test v2f64 moves.
|
||||
define <2 x double> @f6(<2 x double> %val1, <2 x double> %val2) {
|
||||
; CHECK-LABEL: f6:
|
||||
; CHECK: vlr %v24, %v26
|
||||
; CHECK: br %r14
|
||||
ret <2 x double> %val2
|
||||
}
|
||||
|
|
|
@ -38,6 +38,15 @@ define <2 x i64> @f4(<2 x i64> *%ptr) {
|
|||
ret <2 x i64> %ret
|
||||
}
|
||||
|
||||
; Test v2f64 loads.
|
||||
define <2 x double> @f6(<2 x double> *%ptr) {
|
||||
; CHECK-LABEL: f6:
|
||||
; CHECK: vl %v24, 0(%r2)
|
||||
; CHECK: br %r14
|
||||
%ret = load <2 x double>, <2 x double> *%ptr
|
||||
ret <2 x double> %ret
|
||||
}
|
||||
|
||||
; Test the highest aligned in-range offset.
|
||||
define <16 x i8> @f7(<16 x i8> *%base) {
|
||||
; CHECK-LABEL: f7:
|
||||
|
|
|
@ -38,6 +38,15 @@ define void @f4(<2 x i64> %val, <2 x i64> *%ptr) {
|
|||
ret void
|
||||
}
|
||||
|
||||
; Test v2f64 stores.
|
||||
define void @f6(<2 x double> %val, <2 x double> *%ptr) {
|
||||
; CHECK-LABEL: f6:
|
||||
; CHECK: vst %v24, 0(%r2)
|
||||
; CHECK: br %r14
|
||||
store <2 x double> %val, <2 x double> *%ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
; Test the highest aligned in-range offset.
|
||||
define void @f7(<16 x i8> %val, <16 x i8> *%base) {
|
||||
; CHECK-LABEL: f7:
|
||||
|
|
|
@ -110,6 +110,34 @@ define <2 x i64> @f12(<2 x i64> %val, i64 %element, i32 %index) {
|
|||
ret <2 x i64> %ret
|
||||
}
|
||||
|
||||
; Test v2f64 insertion into the first element.
|
||||
define <2 x double> @f16(<2 x double> %val, double %element) {
|
||||
; CHECK-LABEL: f16:
|
||||
; CHECK: vpdi %v24, %v0, %v24, 1
|
||||
; CHECK: br %r14
|
||||
%ret = insertelement <2 x double> %val, double %element, i32 0
|
||||
ret <2 x double> %ret
|
||||
}
|
||||
|
||||
; Test v2f64 insertion into the last element.
|
||||
define <2 x double> @f17(<2 x double> %val, double %element) {
|
||||
; CHECK-LABEL: f17:
|
||||
; CHECK: vpdi %v24, %v24, %v0, 0
|
||||
; CHECK: br %r14
|
||||
%ret = insertelement <2 x double> %val, double %element, i32 1
|
||||
ret <2 x double> %ret
|
||||
}
|
||||
|
||||
; Test v2f64 insertion into a variable element.
|
||||
define <2 x double> @f18(<2 x double> %val, double %element, i32 %index) {
|
||||
; CHECK-LABEL: f18:
|
||||
; CHECK: lgdr [[REG:%r[0-5]]], %f0
|
||||
; CHECK: vlvgg %v24, [[REG]], 0(%r2)
|
||||
; CHECK: br %r14
|
||||
%ret = insertelement <2 x double> %val, double %element, i32 %index
|
||||
ret <2 x double> %ret
|
||||
}
|
||||
|
||||
; Test v16i8 insertion into a variable element plus one.
|
||||
define <16 x i8> @f19(<16 x i8> %val, i8 %element, i32 %index) {
|
||||
; CHECK-LABEL: f19:
|
||||
|
|
|
@ -150,6 +150,41 @@ define i64 @f16(<2 x i64> %val, i32 %index) {
|
|||
ret i64 %ret
|
||||
}
|
||||
|
||||
; Test v2f64 extraction of the first element.
|
||||
define double @f23(<2 x double> %val) {
|
||||
; CHECK-LABEL: f23:
|
||||
; CHECK: vlr %v0, %v24
|
||||
; CHECK: br %r14
|
||||
%ret = extractelement <2 x double> %val, i32 0
|
||||
ret double %ret
|
||||
}
|
||||
|
||||
; Test v2f64 extraction of the last element.
|
||||
define double @f24(<2 x double> %val) {
|
||||
; CHECK-LABEL: f24:
|
||||
; CHECK: vrepg %v0, %v24, 1
|
||||
; CHECK: br %r14
|
||||
%ret = extractelement <2 x double> %val, i32 1
|
||||
ret double %ret
|
||||
}
|
||||
|
||||
; Test v2f64 extractions of an absurd element number. This must compile
|
||||
; but we don't care what it does.
|
||||
define double @f25(<2 x double> %val) {
|
||||
%ret = extractelement <2 x double> %val, i32 100000
|
||||
ret double %ret
|
||||
}
|
||||
|
||||
; Test v2f64 extraction of a variable element.
|
||||
define double @f26(<2 x double> %val, i32 %index) {
|
||||
; CHECK-LABEL: f26:
|
||||
; CHECK: vlgvg [[REG:%r[0-5]]], %v24, 0(%r2)
|
||||
; CHECK: ldgr %f0, [[REG]]
|
||||
; CHECK: br %r14
|
||||
%ret = extractelement <2 x double> %val, i32 %index
|
||||
ret double %ret
|
||||
}
|
||||
|
||||
; Test v16i8 extraction of a variable element with an offset.
|
||||
define i8 @f27(<16 x i8> %val, i32 %index) {
|
||||
; CHECK-LABEL: f27:
|
||||
|
|
|
@ -37,3 +37,12 @@ define <2 x i64> @f4(i64 %val) {
|
|||
%ret = insertelement <2 x i64> undef, i64 %val, i32 0
|
||||
ret <2 x i64> %ret
|
||||
}
|
||||
|
||||
; Test v2f64, which is just a move.
|
||||
define <2 x double> @f6(double %val) {
|
||||
; CHECK-LABEL: f6:
|
||||
; CHECK: vlr %v24, %v0
|
||||
; CHECK: br %r14
|
||||
%ret = insertelement <2 x double> undef, double %val, i32 0
|
||||
ret <2 x double> %ret
|
||||
}
|
||||
|
|
|
@ -214,6 +214,59 @@ define <2 x i64> @f20(<2 x i64> %val, i64 *%ptr, i32 %index) {
|
|||
ret <2 x i64> %ret
|
||||
}
|
||||
|
||||
; Test v2f64 insertion into the first element.
|
||||
define <2 x double> @f26(<2 x double> %val, double *%ptr) {
|
||||
; CHECK-LABEL: f26:
|
||||
; CHECK: vleg %v24, 0(%r2), 0
|
||||
; CHECK: br %r14
|
||||
%element = load double, double *%ptr
|
||||
%ret = insertelement <2 x double> %val, double %element, i32 0
|
||||
ret <2 x double> %ret
|
||||
}
|
||||
|
||||
; Test v2f64 insertion into the last element.
|
||||
define <2 x double> @f27(<2 x double> %val, double *%ptr) {
|
||||
; CHECK-LABEL: f27:
|
||||
; CHECK: vleg %v24, 0(%r2), 1
|
||||
; CHECK: br %r14
|
||||
%element = load double, double *%ptr
|
||||
%ret = insertelement <2 x double> %val, double %element, i32 1
|
||||
ret <2 x double> %ret
|
||||
}
|
||||
|
||||
; Test v2f64 insertion with the highest in-range offset.
|
||||
define <2 x double> @f28(<2 x double> %val, double *%base) {
|
||||
; CHECK-LABEL: f28:
|
||||
; CHECK: vleg %v24, 4088(%r2), 1
|
||||
; CHECK: br %r14
|
||||
%ptr = getelementptr double, double *%base, i32 511
|
||||
%element = load double, double *%ptr
|
||||
%ret = insertelement <2 x double> %val, double %element, i32 1
|
||||
ret <2 x double> %ret
|
||||
}
|
||||
|
||||
; Test v2f64 insertion with the first ouf-of-range offset.
|
||||
define <2 x double> @f29(<2 x double> %val, double *%base) {
|
||||
; CHECK-LABEL: f29:
|
||||
; CHECK: aghi %r2, 4096
|
||||
; CHECK: vleg %v24, 0(%r2), 0
|
||||
; CHECK: br %r14
|
||||
%ptr = getelementptr double, double *%base, i32 512
|
||||
%element = load double, double *%ptr
|
||||
%ret = insertelement <2 x double> %val, double %element, i32 0
|
||||
ret <2 x double> %ret
|
||||
}
|
||||
|
||||
; Test v2f64 insertion into a variable element.
|
||||
define <2 x double> @f30(<2 x double> %val, double *%ptr, i32 %index) {
|
||||
; CHECK-LABEL: f30:
|
||||
; CHECK-NOT: vleg
|
||||
; CHECK: br %r14
|
||||
%element = load double, double *%ptr
|
||||
%ret = insertelement <2 x double> %val, double %element, i32 %index
|
||||
ret <2 x double> %ret
|
||||
}
|
||||
|
||||
; Test a v4i32 gather of the first element.
|
||||
define <4 x i32> @f31(<4 x i32> %val, <4 x i32> %index, i64 %base) {
|
||||
; CHECK-LABEL: f31:
|
||||
|
@ -282,3 +335,29 @@ define <2 x i64> @f35(<2 x i64> %val, <2 x i64> %index, i64 %base) {
|
|||
%ret = insertelement <2 x i64> %val, i64 %element, i32 1
|
||||
ret <2 x i64> %ret
|
||||
}
|
||||
|
||||
; Test a v2f64 gather of the first element.
|
||||
define <2 x double> @f38(<2 x double> %val, <2 x i64> %index, i64 %base) {
|
||||
; CHECK-LABEL: f38:
|
||||
; CHECK: vgeg %v24, 0(%v26,%r2), 0
|
||||
; CHECK: br %r14
|
||||
%elem = extractelement <2 x i64> %index, i32 0
|
||||
%add = add i64 %base, %elem
|
||||
%ptr = inttoptr i64 %add to double *
|
||||
%element = load double, double *%ptr
|
||||
%ret = insertelement <2 x double> %val, double %element, i32 0
|
||||
ret <2 x double> %ret
|
||||
}
|
||||
|
||||
; Test a v2f64 gather of the last element.
|
||||
define <2 x double> @f39(<2 x double> %val, <2 x i64> %index, i64 %base) {
|
||||
; CHECK-LABEL: f39:
|
||||
; CHECK: vgeg %v24, 0(%v26,%r2), 1
|
||||
; CHECK: br %r14
|
||||
%elem = extractelement <2 x i64> %index, i32 1
|
||||
%add = add i64 %base, %elem
|
||||
%ptr = inttoptr i64 %add to double *
|
||||
%element = load double, double *%ptr
|
||||
%ret = insertelement <2 x double> %val, double %element, i32 1
|
||||
ret <2 x double> %ret
|
||||
}
|
||||
|
|
|
@ -235,3 +235,30 @@ define <2 x i64> @f26(<2 x i64> %val, i32 %index) {
|
|||
%ret = insertelement <2 x i64> %val, i64 0, i32 %index
|
||||
ret <2 x i64> %ret
|
||||
}
|
||||
|
||||
; Test v2f64 insertion of 0 into the first element.
|
||||
define <2 x double> @f30(<2 x double> %val) {
|
||||
; CHECK-LABEL: f30:
|
||||
; CHECK: vleig %v24, 0, 0
|
||||
; CHECK: br %r14
|
||||
%ret = insertelement <2 x double> %val, double 0.0, i32 0
|
||||
ret <2 x double> %ret
|
||||
}
|
||||
|
||||
; Test v2f64 insertion of 0 into the last element.
|
||||
define <2 x double> @f31(<2 x double> %val) {
|
||||
; CHECK-LABEL: f31:
|
||||
; CHECK: vleig %v24, 0, 1
|
||||
; CHECK: br %r14
|
||||
%ret = insertelement <2 x double> %val, double 0.0, i32 1
|
||||
ret <2 x double> %ret
|
||||
}
|
||||
|
||||
; Test v2f64 insertion of a nonzero value.
|
||||
define <2 x double> @f32(<2 x double> %val) {
|
||||
; CHECK-LABEL: f32:
|
||||
; CHECK-NOT: vleig
|
||||
; CHECK: br %r14
|
||||
%ret = insertelement <2 x double> %val, double 1.0, i32 1
|
||||
ret <2 x double> %ret
|
||||
}
|
||||
|
|
|
@ -258,6 +258,59 @@ define void @f24(<2 x i64> %val, i64 *%ptr, i32 %index) {
|
|||
ret void
|
||||
}
|
||||
|
||||
; Test v2f64 extraction from the first element.
|
||||
define void @f32(<2 x double> %val, double *%ptr) {
|
||||
; CHECK-LABEL: f32:
|
||||
; CHECK: vsteg %v24, 0(%r2), 0
|
||||
; CHECK: br %r14
|
||||
%element = extractelement <2 x double> %val, i32 0
|
||||
store double %element, double *%ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
; Test v2f64 extraction from the last element.
|
||||
define void @f33(<2 x double> %val, double *%ptr) {
|
||||
; CHECK-LABEL: f33:
|
||||
; CHECK: vsteg %v24, 0(%r2), 1
|
||||
; CHECK: br %r14
|
||||
%element = extractelement <2 x double> %val, i32 1
|
||||
store double %element, double *%ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
; Test v2f64 extraction with the highest in-range offset.
|
||||
define void @f34(<2 x double> %val, double *%base) {
|
||||
; CHECK-LABEL: f34:
|
||||
; CHECK: vsteg %v24, 4088(%r2), 1
|
||||
; CHECK: br %r14
|
||||
%ptr = getelementptr double, double *%base, i32 511
|
||||
%element = extractelement <2 x double> %val, i32 1
|
||||
store double %element, double *%ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
; Test v2f64 extraction with the first ouf-of-range offset.
|
||||
define void @f35(<2 x double> %val, double *%base) {
|
||||
; CHECK-LABEL: f35:
|
||||
; CHECK: aghi %r2, 4096
|
||||
; CHECK: vsteg %v24, 0(%r2), 0
|
||||
; CHECK: br %r14
|
||||
%ptr = getelementptr double, double *%base, i32 512
|
||||
%element = extractelement <2 x double> %val, i32 0
|
||||
store double %element, double *%ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
; Test v2f64 extraction from a variable element.
|
||||
define void @f36(<2 x double> %val, double *%ptr, i32 %index) {
|
||||
; CHECK-LABEL: f36:
|
||||
; CHECK-NOT: vsteg
|
||||
; CHECK: br %r14
|
||||
%element = extractelement <2 x double> %val, i32 %index
|
||||
store double %element, double *%ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
; Test a v4i32 scatter of the first element.
|
||||
define void @f37(<4 x i32> %val, <4 x i32> %index, i64 %base) {
|
||||
; CHECK-LABEL: f37:
|
||||
|
@ -326,3 +379,29 @@ define void @f41(<2 x i64> %val, <2 x i64> %index, i64 %base) {
|
|||
store i64 %element, i64 *%ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
; Test a v2f64 scatter of the first element.
|
||||
define void @f44(<2 x double> %val, <2 x i64> %index, i64 %base) {
|
||||
; CHECK-LABEL: f44:
|
||||
; CHECK: vsceg %v24, 0(%v26,%r2), 0
|
||||
; CHECK: br %r14
|
||||
%elem = extractelement <2 x i64> %index, i32 0
|
||||
%add = add i64 %base, %elem
|
||||
%ptr = inttoptr i64 %add to double *
|
||||
%element = extractelement <2 x double> %val, i32 0
|
||||
store double %element, double *%ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
; Test a v2f64 scatter of the last element.
|
||||
define void @f45(<2 x double> %val, <2 x i64> %index, i64 %base) {
|
||||
; CHECK-LABEL: f45:
|
||||
; CHECK: vsceg %v24, 0(%v26,%r2), 1
|
||||
; CHECK: br %r14
|
||||
%elem = extractelement <2 x i64> %index, i32 1
|
||||
%add = add i64 %base, %elem
|
||||
%ptr = inttoptr i64 %add to double *
|
||||
%element = extractelement <2 x double> %val, i32 1
|
||||
store double %element, double *%ptr
|
||||
ret void
|
||||
}
|
||||
|
|
|
@ -91,3 +91,12 @@ define <2 x i64> @f10(i64 %val) {
|
|||
%ret = insertelement <2 x i64> undef, i64 %val, i32 1
|
||||
ret <2 x i64> %ret
|
||||
}
|
||||
|
||||
; Test v2f64 insertion into an undef.
|
||||
define <2 x double> @f12(double %val) {
|
||||
; CHECK-LABEL: f12:
|
||||
; CHECK: vrepg %v24, %v0, 0
|
||||
; CHECK: br %r14
|
||||
%ret = insertelement <2 x double> undef, double %val, i32 1
|
||||
ret <2 x double> %ret
|
||||
}
|
||||
|
|
|
@ -101,3 +101,13 @@ define <2 x i64> @f10(i64 *%ptr) {
|
|||
%ret = insertelement <2 x i64> undef, i64 %val, i32 1
|
||||
ret <2 x i64> %ret
|
||||
}
|
||||
|
||||
; Test v2f64 insertion into an undef.
|
||||
define <2 x double> @f12(double *%ptr) {
|
||||
; CHECK-LABEL: f12:
|
||||
; CHECK: vlrepg %v24, 0(%r2)
|
||||
; CHECK: br %r14
|
||||
%val = load double, double *%ptr
|
||||
%ret = insertelement <2 x double> undef, double %val, i32 1
|
||||
ret <2 x double> %ret
|
||||
}
|
||||
|
|
|
@ -45,3 +45,13 @@ define <2 x i64> @f4(i64 %val) {
|
|||
%ret = insertelement <2 x i64> zeroinitializer, i64 %val, i32 1
|
||||
ret <2 x i64> %ret
|
||||
}
|
||||
|
||||
; Test v2f64 insertion into 0.
|
||||
define <2 x double> @f6(double %val) {
|
||||
; CHECK-LABEL: f6:
|
||||
; CHECK: vgbm [[REG:%v[0-9]+]], 0
|
||||
; CHECK: vmrhg %v24, [[REG]], %v0
|
||||
; CHECK: br %r14
|
||||
%ret = insertelement <2 x double> zeroinitializer, double %val, i32 1
|
||||
ret <2 x double> %ret
|
||||
}
|
||||
|
|
|
@ -74,3 +74,13 @@ define <2 x i64> @f7(i64 *%ptr) {
|
|||
%ret = insertelement <2 x i64> zeroinitializer, i64 %val, i32 0
|
||||
ret <2 x i64> %ret
|
||||
}
|
||||
|
||||
; Test VLLEZG with a double.
|
||||
define <2 x double> @f9(double *%ptr) {
|
||||
; CHECK-LABEL: f9:
|
||||
; CHECK: vllezg %v24, 0(%r2)
|
||||
; CHECK: br %r14
|
||||
%val = load double, double *%ptr
|
||||
%ret = insertelement <2 x double> zeroinitializer, double %val, i32 0
|
||||
ret <2 x double> %ret
|
||||
}
|
||||
|
|
|
@ -37,3 +37,13 @@ define <2 x i64> @f4(<2 x i64> %dummy, <2 x i64> %val1, <2 x i64> %val2) {
|
|||
%ret = mul <2 x i64> %val1, %val2
|
||||
ret <2 x i64> %ret
|
||||
}
|
||||
|
||||
; Test a v2f64 multiplication.
|
||||
define <2 x double> @f5(<2 x double> %dummy, <2 x double> %val1,
|
||||
<2 x double> %val2) {
|
||||
; CHECK-LABEL: f5:
|
||||
; CHECK: vfmdb %v24, %v26, %v28
|
||||
; CHECK: br %r14
|
||||
%ret = fmul <2 x double> %val1, %val2
|
||||
ret <2 x double> %ret
|
||||
}
|
||||
|
|
|
@ -2,6 +2,8 @@
|
|||
;
|
||||
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
|
||||
|
||||
declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>)
|
||||
|
||||
; Test a v16i8 multiply-and-add.
|
||||
define <16 x i8> @f1(<16 x i8> %dummy, <16 x i8> %val1, <16 x i8> %val2,
|
||||
<16 x i8> %val3) {
|
||||
|
@ -34,3 +36,28 @@ define <4 x i32> @f3(<4 x i32> %dummy, <4 x i32> %val1, <4 x i32> %val2,
|
|||
%ret = add <4 x i32> %mul, %val3
|
||||
ret <4 x i32> %ret
|
||||
}
|
||||
|
||||
; Test a v2f64 multiply-and-add.
|
||||
define <2 x double> @f4(<2 x double> %dummy, <2 x double> %val1,
|
||||
<2 x double> %val2, <2 x double> %val3) {
|
||||
; CHECK-LABEL: f4:
|
||||
; CHECK: vfmadb %v24, %v26, %v28, %v30
|
||||
; CHECK: br %r14
|
||||
%ret = call <2 x double> @llvm.fma.v2f64 (<2 x double> %val1,
|
||||
<2 x double> %val2,
|
||||
<2 x double> %val3)
|
||||
ret <2 x double> %ret
|
||||
}
|
||||
|
||||
; Test a v2f64 multiply-and-subtract.
|
||||
define <2 x double> @f5(<2 x double> %dummy, <2 x double> %val1,
|
||||
<2 x double> %val2, <2 x double> %val3) {
|
||||
; CHECK-LABEL: f5:
|
||||
; CHECK: vfmsdb %v24, %v26, %v28, %v30
|
||||
; CHECK: br %r14
|
||||
%negval3 = fsub <2 x double> <double -0.0, double -0.0>, %val3
|
||||
%ret = call <2 x double> @llvm.fma.v2f64 (<2 x double> %val1,
|
||||
<2 x double> %val2,
|
||||
<2 x double> %negval3)
|
||||
ret <2 x double> %ret
|
||||
}
|
||||
|
|
|
@ -37,3 +37,12 @@ define <2 x i64> @f4(<2 x i64> %dummy, <2 x i64> %val) {
|
|||
%ret = sub <2 x i64> zeroinitializer, %val
|
||||
ret <2 x i64> %ret
|
||||
}
|
||||
|
||||
; Test a v2f64 negation.
|
||||
define <2 x double> @f5(<2 x double> %dummy, <2 x double> %val) {
|
||||
; CHECK-LABEL: f5:
|
||||
; CHECK: vflcdb %v24, %v26
|
||||
; CHECK: br %r14
|
||||
%ret = fsub <2 x double> <double -0.0, double -0.0>, %val
|
||||
ret <2 x double> %ret
|
||||
}
|
||||
|
|
|
@ -122,3 +122,23 @@ define <2 x i64> @f11(<2 x i64> %val) {
|
|||
<2 x i32> <i32 1, i32 1>
|
||||
ret <2 x i64> %ret
|
||||
}
|
||||
|
||||
; Test v2f64 splat of the first element.
|
||||
define <2 x double> @f15(<2 x double> %val) {
|
||||
; CHECK-LABEL: f15:
|
||||
; CHECK: vrepg %v24, %v24, 0
|
||||
; CHECK: br %r14
|
||||
%ret = shufflevector <2 x double> %val, <2 x double> undef,
|
||||
<2 x i32> zeroinitializer
|
||||
ret <2 x double> %ret
|
||||
}
|
||||
|
||||
; Test v2f64 splat of the last element.
|
||||
define <2 x double> @f16(<2 x double> %val) {
|
||||
; CHECK-LABEL: f16:
|
||||
; CHECK: vrepg %v24, %v24, 1
|
||||
; CHECK: br %r14
|
||||
%ret = shufflevector <2 x double> %val, <2 x double> undef,
|
||||
<2 x i32> <i32 1, i32 1>
|
||||
ret <2 x double> %ret
|
||||
}
|
||||
|
|
|
@ -142,3 +142,25 @@ define <2 x i64> @f11(i64 %scalar) {
|
|||
<2 x i32> <i32 1, i32 1>
|
||||
ret <2 x i64> %ret
|
||||
}
|
||||
|
||||
; Test v2f64 splat of the first element.
|
||||
define <2 x double> @f15(double %scalar) {
|
||||
; CHECK-LABEL: f15:
|
||||
; CHECK: vrepg %v24, %v0, 0
|
||||
; CHECK: br %r14
|
||||
%val = insertelement <2 x double> undef, double %scalar, i32 0
|
||||
%ret = shufflevector <2 x double> %val, <2 x double> undef,
|
||||
<2 x i32> zeroinitializer
|
||||
ret <2 x double> %ret
|
||||
}
|
||||
|
||||
; Test v2f64 splat of the last element.
|
||||
define <2 x double> @f16(double %scalar) {
|
||||
; CHECK-LABEL: f16:
|
||||
; CHECK: vrepg %v24, %v0, 0
|
||||
; CHECK: br %r14
|
||||
%val = insertelement <2 x double> undef, double %scalar, i32 1
|
||||
%ret = shufflevector <2 x double> %val, <2 x double> undef,
|
||||
<2 x i32> <i32 1, i32 1>
|
||||
ret <2 x double> %ret
|
||||
}
|
||||
|
|
|
@ -158,6 +158,46 @@ define <2 x i64> @f12(i64 *%base) {
|
|||
ret <2 x i64> %ret
|
||||
}
|
||||
|
||||
|
||||
; Test a v2f64 replicating load with no offset.
|
||||
define <2 x double> @f16(double *%ptr) {
|
||||
; CHECK-LABEL: f16:
|
||||
; CHECK: vlrepg %v24, 0(%r2)
|
||||
; CHECK: br %r14
|
||||
%scalar = load double, double *%ptr
|
||||
%val = insertelement <2 x double> undef, double %scalar, i32 0
|
||||
%ret = shufflevector <2 x double> %val, <2 x double> undef,
|
||||
<2 x i32> zeroinitializer
|
||||
ret <2 x double> %ret
|
||||
}
|
||||
|
||||
; Test a v2f64 replicating load with the maximum in-range offset.
|
||||
define <2 x double> @f17(double *%base) {
|
||||
; CHECK-LABEL: f17:
|
||||
; CHECK: vlrepg %v24, 4088(%r2)
|
||||
; CHECK: br %r14
|
||||
%ptr = getelementptr double, double *%base, i32 511
|
||||
%scalar = load double, double *%ptr
|
||||
%val = insertelement <2 x double> undef, double %scalar, i32 0
|
||||
%ret = shufflevector <2 x double> %val, <2 x double> undef,
|
||||
<2 x i32> zeroinitializer
|
||||
ret <2 x double> %ret
|
||||
}
|
||||
|
||||
; Test a v2f64 replicating load with the first out-of-range offset.
|
||||
define <2 x double> @f18(double *%base) {
|
||||
; CHECK-LABEL: f18:
|
||||
; CHECK: aghi %r2, 4096
|
||||
; CHECK: vlrepg %v24, 0(%r2)
|
||||
; CHECK: br %r14
|
||||
%ptr = getelementptr double, double *%base, i32 512
|
||||
%scalar = load double, double *%ptr
|
||||
%val = insertelement <2 x double> undef, double %scalar, i32 0
|
||||
%ret = shufflevector <2 x double> %val, <2 x double> undef,
|
||||
<2 x i32> zeroinitializer
|
||||
ret <2 x double> %ret
|
||||
}
|
||||
|
||||
; Test a v16i8 replicating load with an index.
|
||||
define <16 x i8> @f19(i8 *%base, i64 %index) {
|
||||
; CHECK-LABEL: f19:
|
||||
|
|
|
@ -158,3 +158,23 @@ define <2 x i64> @f13(<2 x i64> %val1, <2 x i64> %val2) {
|
|||
<2 x i32> <i32 2, i32 0>
|
||||
ret <2 x i64> %ret
|
||||
}
|
||||
|
||||
; Test a canonical v2f64 merge high.
|
||||
define <2 x double> @f16(<2 x double> %val1, <2 x double> %val2) {
|
||||
; CHECK-LABEL: f16:
|
||||
; CHECK: vmrhg %v24, %v24, %v26
|
||||
; CHECK: br %r14
|
||||
%ret = shufflevector <2 x double> %val1, <2 x double> %val2,
|
||||
<2 x i32> <i32 0, i32 2>
|
||||
ret <2 x double> %ret
|
||||
}
|
||||
|
||||
; Test a reversed v2f64 merge high.
|
||||
define <2 x double> @f17(<2 x double> %val1, <2 x double> %val2) {
|
||||
; CHECK-LABEL: f17:
|
||||
; CHECK: vmrhg %v24, %v26, %v24
|
||||
; CHECK: br %r14
|
||||
%ret = shufflevector <2 x double> %val1, <2 x double> %val2,
|
||||
<2 x i32> <i32 2, i32 0>
|
||||
ret <2 x double> %ret
|
||||
}
|
||||
|
|
|
@ -158,3 +158,23 @@ define <2 x i64> @f13(<2 x i64> %val1, <2 x i64> %val2) {
|
|||
<2 x i32> <i32 3, i32 1>
|
||||
ret <2 x i64> %ret
|
||||
}
|
||||
|
||||
; Test a canonical v2f64 merge low.
|
||||
define <2 x double> @f16(<2 x double> %val1, <2 x double> %val2) {
|
||||
; CHECK-LABEL: f16:
|
||||
; CHECK: vmrlg %v24, %v24, %v26
|
||||
; CHECK: br %r14
|
||||
%ret = shufflevector <2 x double> %val1, <2 x double> %val2,
|
||||
<2 x i32> <i32 1, i32 3>
|
||||
ret <2 x double> %ret
|
||||
}
|
||||
|
||||
; Test a reversed v2f64 merge low.
|
||||
define <2 x double> @f17(<2 x double> %val1, <2 x double> %val2) {
|
||||
; CHECK-LABEL: f17:
|
||||
; CHECK: vmrlg %v24, %v26, %v24
|
||||
; CHECK: br %r14
|
||||
%ret = shufflevector <2 x double> %val1, <2 x double> %val2,
|
||||
<2 x i32> <i32 3, i32 1>
|
||||
ret <2 x double> %ret
|
||||
}
|
||||
|
|
|
@ -128,3 +128,23 @@ define <2 x i64> @f11(<2 x i64> %val1, <2 x i64> %val2) {
|
|||
<2 x i32> <i32 3, i32 0>
|
||||
ret <2 x i64> %ret
|
||||
}
|
||||
|
||||
; Test a high1/low2 permute for v2f64.
|
||||
define <2 x double> @f14(<2 x double> %val1, <2 x double> %val2) {
|
||||
; CHECK-LABEL: f14:
|
||||
; CHECK: vpdi %v24, %v24, %v26, 1
|
||||
; CHECK: br %r14
|
||||
%ret = shufflevector <2 x double> %val1, <2 x double> %val2,
|
||||
<2 x i32> <i32 0, i32 3>
|
||||
ret <2 x double> %ret
|
||||
}
|
||||
|
||||
; Test a low2/high1 permute for v2f64.
|
||||
define <2 x double> @f15(<2 x double> %val1, <2 x double> %val2) {
|
||||
; CHECK-LABEL: f15:
|
||||
; CHECK: vpdi %v24, %v26, %v24, 4
|
||||
; CHECK: br %r14
|
||||
%ret = shufflevector <2 x double> %val1, <2 x double> %val2,
|
||||
<2 x i32> <i32 3, i32 0>
|
||||
ret <2 x double> %ret
|
||||
}
|
||||
|
|
|
@ -0,0 +1,58 @@
|
|||
; Test v2f64 rounding.
|
||||
;
|
||||
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
|
||||
|
||||
declare <2 x double> @llvm.rint.v2f64(<2 x double>)
|
||||
declare <2 x double> @llvm.nearbyint.v2f64(<2 x double>)
|
||||
declare <2 x double> @llvm.floor.v2f64(<2 x double>)
|
||||
declare <2 x double> @llvm.ceil.v2f64(<2 x double>)
|
||||
declare <2 x double> @llvm.trunc.v2f64(<2 x double>)
|
||||
declare <2 x double> @llvm.round.v2f64(<2 x double>)
|
||||
|
||||
define <2 x double> @f1(<2 x double> %val) {
|
||||
; CHECK-LABEL: f1:
|
||||
; CHECK: vfidb %v24, %v24, 0, 0
|
||||
; CHECK: br %r14
|
||||
%res = call <2 x double> @llvm.rint.v2f64(<2 x double> %val)
|
||||
ret <2 x double> %res
|
||||
}
|
||||
|
||||
define <2 x double> @f2(<2 x double> %val) {
|
||||
; CHECK-LABEL: f2:
|
||||
; CHECK: vfidb %v24, %v24, 4, 0
|
||||
; CHECK: br %r14
|
||||
%res = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %val)
|
||||
ret <2 x double> %res
|
||||
}
|
||||
|
||||
define <2 x double> @f3(<2 x double> %val) {
|
||||
; CHECK-LABEL: f3:
|
||||
; CHECK: vfidb %v24, %v24, 4, 7
|
||||
; CHECK: br %r14
|
||||
%res = call <2 x double> @llvm.floor.v2f64(<2 x double> %val)
|
||||
ret <2 x double> %res
|
||||
}
|
||||
|
||||
define <2 x double> @f4(<2 x double> %val) {
|
||||
; CHECK-LABEL: f4:
|
||||
; CHECK: vfidb %v24, %v24, 4, 6
|
||||
; CHECK: br %r14
|
||||
%res = call <2 x double> @llvm.ceil.v2f64(<2 x double> %val)
|
||||
ret <2 x double> %res
|
||||
}
|
||||
|
||||
define <2 x double> @f5(<2 x double> %val) {
|
||||
; CHECK-LABEL: f5:
|
||||
; CHECK: vfidb %v24, %v24, 4, 5
|
||||
; CHECK: br %r14
|
||||
%res = call <2 x double> @llvm.trunc.v2f64(<2 x double> %val)
|
||||
ret <2 x double> %res
|
||||
}
|
||||
|
||||
define <2 x double> @f6(<2 x double> %val) {
|
||||
; CHECK-LABEL: f6:
|
||||
; CHECK: vfidb %v24, %v24, 4, 1
|
||||
; CHECK: br %r14
|
||||
%res = call <2 x double> @llvm.round.v2f64(<2 x double> %val)
|
||||
ret <2 x double> %res
|
||||
}
|
|
@ -0,0 +1,13 @@
|
|||
; Test v2f64 square root.
|
||||
;
|
||||
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
|
||||
|
||||
declare <2 x double> @llvm.sqrt.v2f64(<2 x double>)
|
||||
|
||||
define <2 x double> @f1(<2 x double> %val) {
|
||||
; CHECK-LABEL: f1:
|
||||
; CHECK: vfsqdb %v24, %v24
|
||||
; CHECK: br %r14
|
||||
%ret = call <2 x double> @llvm.sqrt.v2f64(<2 x double> %val)
|
||||
ret <2 x double> %ret
|
||||
}
|
|
@ -37,3 +37,13 @@ define <2 x i64> @f4(<2 x i64> %dummy, <2 x i64> %val1, <2 x i64> %val2) {
|
|||
%ret = sub <2 x i64> %val1, %val2
|
||||
ret <2 x i64> %ret
|
||||
}
|
||||
|
||||
; Test a v2f64 subtraction.
|
||||
define <2 x double> @f6(<2 x double> %dummy, <2 x double> %val1,
|
||||
<2 x double> %val2) {
|
||||
; CHECK-LABEL: f6:
|
||||
; CHECK: vfsdb %v24, %v26, %v28
|
||||
; CHECK: br %r14
|
||||
%ret = fsub <2 x double> %val1, %val2
|
||||
ret <2 x double> %ret
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue