forked from OSchip/llvm-project
[SystemZ] Add support for IBM z14 processor (2/3)
This adds support for the new 32-bit vector float instructions of z14. This includes: - Enabling the instructions for the assembler/disassembler. - CodeGen for the instructions, including new LLVM intrinsics. - Scheduler description support for the instructions. - Update to the vector cost function calculations. In general, CodeGen support for the new v4f32 instructions closely matches support for the existing v2f64 instructions. llvm-svn: 308195
This commit is contained in:
parent
2b3482fe85
commit
33435c4c9c
|
@ -389,6 +389,22 @@ let TargetPrefix = "s390" in {
|
||||||
def int_s390_vfmindb : Intrinsic<[llvm_v2f64_ty],
|
def int_s390_vfmindb : Intrinsic<[llvm_v2f64_ty],
|
||||||
[llvm_v2f64_ty, llvm_v2f64_ty, llvm_i32_ty],
|
[llvm_v2f64_ty, llvm_v2f64_ty, llvm_i32_ty],
|
||||||
[IntrNoMem]>;
|
[IntrNoMem]>;
|
||||||
|
def int_s390_vfmaxsb : Intrinsic<[llvm_v4f32_ty],
|
||||||
|
[llvm_v4f32_ty, llvm_v4f32_ty, llvm_i32_ty],
|
||||||
|
[IntrNoMem]>;
|
||||||
|
def int_s390_vfminsb : Intrinsic<[llvm_v4f32_ty],
|
||||||
|
[llvm_v4f32_ty, llvm_v4f32_ty, llvm_i32_ty],
|
||||||
|
[IntrNoMem]>;
|
||||||
|
|
||||||
|
def int_s390_vfcesbs : SystemZBinaryConvCC<llvm_v4i32_ty, llvm_v4f32_ty>;
|
||||||
|
def int_s390_vfchsbs : SystemZBinaryConvCC<llvm_v4i32_ty, llvm_v4f32_ty>;
|
||||||
|
def int_s390_vfchesbs : SystemZBinaryConvCC<llvm_v4i32_ty, llvm_v4f32_ty>;
|
||||||
|
|
||||||
|
def int_s390_vftcisb : SystemZBinaryConvIntCC<llvm_v4i32_ty, llvm_v4f32_ty>;
|
||||||
|
|
||||||
|
def int_s390_vfisb : Intrinsic<[llvm_v4f32_ty],
|
||||||
|
[llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty],
|
||||||
|
[IntrNoMem]>;
|
||||||
|
|
||||||
// Instructions from the Vector Packed Decimal Facility
|
// Instructions from the Vector Packed Decimal Facility
|
||||||
def int_s390_vlrl : GCCBuiltin<"__builtin_s390_vlrl">,
|
def int_s390_vlrl : GCCBuiltin<"__builtin_s390_vlrl">,
|
||||||
|
|
|
@ -419,6 +419,21 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
|
||||||
|
|
||||||
// The vector enhancements facility 1 has instructions for these.
|
// The vector enhancements facility 1 has instructions for these.
|
||||||
if (Subtarget.hasVectorEnhancements1()) {
|
if (Subtarget.hasVectorEnhancements1()) {
|
||||||
|
setOperationAction(ISD::FADD, MVT::v4f32, Legal);
|
||||||
|
setOperationAction(ISD::FNEG, MVT::v4f32, Legal);
|
||||||
|
setOperationAction(ISD::FSUB, MVT::v4f32, Legal);
|
||||||
|
setOperationAction(ISD::FMUL, MVT::v4f32, Legal);
|
||||||
|
setOperationAction(ISD::FMA, MVT::v4f32, Legal);
|
||||||
|
setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
|
||||||
|
setOperationAction(ISD::FABS, MVT::v4f32, Legal);
|
||||||
|
setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
|
||||||
|
setOperationAction(ISD::FRINT, MVT::v4f32, Legal);
|
||||||
|
setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);
|
||||||
|
setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal);
|
||||||
|
setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
|
||||||
|
setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);
|
||||||
|
setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
|
||||||
|
|
||||||
setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);
|
setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);
|
||||||
setOperationAction(ISD::FMAXNAN, MVT::f64, Legal);
|
setOperationAction(ISD::FMAXNAN, MVT::f64, Legal);
|
||||||
setOperationAction(ISD::FMINNUM, MVT::f64, Legal);
|
setOperationAction(ISD::FMINNUM, MVT::f64, Legal);
|
||||||
|
@ -428,6 +443,16 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
|
||||||
setOperationAction(ISD::FMAXNAN, MVT::v2f64, Legal);
|
setOperationAction(ISD::FMAXNAN, MVT::v2f64, Legal);
|
||||||
setOperationAction(ISD::FMINNUM, MVT::v2f64, Legal);
|
setOperationAction(ISD::FMINNUM, MVT::v2f64, Legal);
|
||||||
setOperationAction(ISD::FMINNAN, MVT::v2f64, Legal);
|
setOperationAction(ISD::FMINNAN, MVT::v2f64, Legal);
|
||||||
|
|
||||||
|
setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
|
||||||
|
setOperationAction(ISD::FMAXNAN, MVT::f32, Legal);
|
||||||
|
setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
|
||||||
|
setOperationAction(ISD::FMINNAN, MVT::f32, Legal);
|
||||||
|
|
||||||
|
setOperationAction(ISD::FMAXNUM, MVT::v4f32, Legal);
|
||||||
|
setOperationAction(ISD::FMAXNAN, MVT::v4f32, Legal);
|
||||||
|
setOperationAction(ISD::FMINNUM, MVT::v4f32, Legal);
|
||||||
|
setOperationAction(ISD::FMINNAN, MVT::v4f32, Legal);
|
||||||
}
|
}
|
||||||
|
|
||||||
// We have fused multiply-addition for f32 and f64 but not f128.
|
// We have fused multiply-addition for f32 and f64 but not f128.
|
||||||
|
@ -1478,21 +1503,25 @@ static bool isIntrinsicWithCC(SDValue Op, unsigned &Opcode, unsigned &CCValid) {
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
case Intrinsic::s390_vfcedbs:
|
case Intrinsic::s390_vfcedbs:
|
||||||
|
case Intrinsic::s390_vfcesbs:
|
||||||
Opcode = SystemZISD::VFCMPES;
|
Opcode = SystemZISD::VFCMPES;
|
||||||
CCValid = SystemZ::CCMASK_VCMP;
|
CCValid = SystemZ::CCMASK_VCMP;
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
case Intrinsic::s390_vfchdbs:
|
case Intrinsic::s390_vfchdbs:
|
||||||
|
case Intrinsic::s390_vfchsbs:
|
||||||
Opcode = SystemZISD::VFCMPHS;
|
Opcode = SystemZISD::VFCMPHS;
|
||||||
CCValid = SystemZ::CCMASK_VCMP;
|
CCValid = SystemZ::CCMASK_VCMP;
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
case Intrinsic::s390_vfchedbs:
|
case Intrinsic::s390_vfchedbs:
|
||||||
|
case Intrinsic::s390_vfchesbs:
|
||||||
Opcode = SystemZISD::VFCMPHES;
|
Opcode = SystemZISD::VFCMPHES;
|
||||||
CCValid = SystemZ::CCMASK_VCMP;
|
CCValid = SystemZ::CCMASK_VCMP;
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
case Intrinsic::s390_vftcidb:
|
case Intrinsic::s390_vftcidb:
|
||||||
|
case Intrinsic::s390_vftcisb:
|
||||||
Opcode = SystemZISD::VFTCI;
|
Opcode = SystemZISD::VFTCI;
|
||||||
CCValid = SystemZ::CCMASK_VCMP;
|
CCValid = SystemZ::CCMASK_VCMP;
|
||||||
return true;
|
return true;
|
||||||
|
@ -2332,11 +2361,15 @@ static SDValue expandV4F32ToV2F64(SelectionDAG &DAG, int Start, const SDLoc &DL,
|
||||||
|
|
||||||
// Build a comparison of vectors CmpOp0 and CmpOp1 using opcode Opcode,
|
// Build a comparison of vectors CmpOp0 and CmpOp1 using opcode Opcode,
|
||||||
// producing a result of type VT.
|
// producing a result of type VT.
|
||||||
static SDValue getVectorCmp(SelectionDAG &DAG, unsigned Opcode, const SDLoc &DL,
|
SDValue SystemZTargetLowering::getVectorCmp(SelectionDAG &DAG, unsigned Opcode,
|
||||||
EVT VT, SDValue CmpOp0, SDValue CmpOp1) {
|
const SDLoc &DL, EVT VT,
|
||||||
// There is no hardware support for v4f32, so extend the vector into
|
SDValue CmpOp0,
|
||||||
// two v2f64s and compare those.
|
SDValue CmpOp1) const {
|
||||||
if (CmpOp0.getValueType() == MVT::v4f32) {
|
// There is no hardware support for v4f32 (unless we have the vector
|
||||||
|
// enhancements facility 1), so extend the vector into two v2f64s
|
||||||
|
// and compare those.
|
||||||
|
if (CmpOp0.getValueType() == MVT::v4f32 &&
|
||||||
|
!Subtarget.hasVectorEnhancements1()) {
|
||||||
SDValue H0 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp0);
|
SDValue H0 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp0);
|
||||||
SDValue L0 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp0);
|
SDValue L0 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp0);
|
||||||
SDValue H1 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp1);
|
SDValue H1 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp1);
|
||||||
|
@ -2350,9 +2383,11 @@ static SDValue getVectorCmp(SelectionDAG &DAG, unsigned Opcode, const SDLoc &DL,
|
||||||
|
|
||||||
// Lower a vector comparison of type CC between CmpOp0 and CmpOp1, producing
|
// Lower a vector comparison of type CC between CmpOp0 and CmpOp1, producing
|
||||||
// an integer mask of type VT.
|
// an integer mask of type VT.
|
||||||
static SDValue lowerVectorSETCC(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
|
SDValue SystemZTargetLowering::lowerVectorSETCC(SelectionDAG &DAG,
|
||||||
ISD::CondCode CC, SDValue CmpOp0,
|
const SDLoc &DL, EVT VT,
|
||||||
SDValue CmpOp1) {
|
ISD::CondCode CC,
|
||||||
|
SDValue CmpOp0,
|
||||||
|
SDValue CmpOp1) const {
|
||||||
bool IsFP = CmpOp0.getValueType().isFloatingPoint();
|
bool IsFP = CmpOp0.getValueType().isFloatingPoint();
|
||||||
bool Invert = false;
|
bool Invert = false;
|
||||||
SDValue Cmp;
|
SDValue Cmp;
|
||||||
|
|
|
@ -480,6 +480,12 @@ private:
|
||||||
const SystemZSubtarget &Subtarget;
|
const SystemZSubtarget &Subtarget;
|
||||||
|
|
||||||
// Implement LowerOperation for individual opcodes.
|
// Implement LowerOperation for individual opcodes.
|
||||||
|
SDValue getVectorCmp(SelectionDAG &DAG, unsigned Opcode,
|
||||||
|
const SDLoc &DL, EVT VT,
|
||||||
|
SDValue CmpOp0, SDValue CmpOp1) const;
|
||||||
|
SDValue lowerVectorSETCC(SelectionDAG &DAG, const SDLoc &DL,
|
||||||
|
EVT VT, ISD::CondCode CC,
|
||||||
|
SDValue CmpOp0, SDValue CmpOp1) const;
|
||||||
SDValue lowerSETCC(SDValue Op, SelectionDAG &DAG) const;
|
SDValue lowerSETCC(SDValue Op, SelectionDAG &DAG) const;
|
||||||
SDValue lowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
|
SDValue lowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
|
||||||
SDValue lowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
|
SDValue lowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
|
||||||
|
|
|
@ -14,7 +14,7 @@
|
||||||
let Predicates = [FeatureVector] in {
|
let Predicates = [FeatureVector] in {
|
||||||
// Register move.
|
// Register move.
|
||||||
def VLR : UnaryVRRa<"vlr", 0xE756, null_frag, v128any, v128any>;
|
def VLR : UnaryVRRa<"vlr", 0xE756, null_frag, v128any, v128any>;
|
||||||
def VLR32 : UnaryAliasVRR<null_frag, v32eb, v32eb>;
|
def VLR32 : UnaryAliasVRR<null_frag, v32sb, v32sb>;
|
||||||
def VLR64 : UnaryAliasVRR<null_frag, v64db, v64db>;
|
def VLR64 : UnaryAliasVRR<null_frag, v64db, v64db>;
|
||||||
|
|
||||||
// Load GR from VR element.
|
// Load GR from VR element.
|
||||||
|
@ -141,7 +141,7 @@ let Predicates = [FeatureVector] in {
|
||||||
// LEY and LDY offer full 20-bit displacement fields. It's often better
|
// LEY and LDY offer full 20-bit displacement fields. It's often better
|
||||||
// to use those instructions rather than force a 20-bit displacement
|
// to use those instructions rather than force a 20-bit displacement
|
||||||
// into a GPR temporary.
|
// into a GPR temporary.
|
||||||
def VL32 : UnaryAliasVRX<load, v32eb, bdxaddr12pair>;
|
def VL32 : UnaryAliasVRX<load, v32sb, bdxaddr12pair>;
|
||||||
def VL64 : UnaryAliasVRX<load, v64db, bdxaddr12pair>;
|
def VL64 : UnaryAliasVRX<load, v64db, bdxaddr12pair>;
|
||||||
|
|
||||||
// Load logical element and zero.
|
// Load logical element and zero.
|
||||||
|
@ -231,7 +231,7 @@ let Predicates = [FeatureVector] in {
|
||||||
// STEY and STDY offer full 20-bit displacement fields. It's often better
|
// STEY and STDY offer full 20-bit displacement fields. It's often better
|
||||||
// to use those instructions rather than force a 20-bit displacement
|
// to use those instructions rather than force a 20-bit displacement
|
||||||
// into a GPR temporary.
|
// into a GPR temporary.
|
||||||
def VST32 : StoreAliasVRX<store, v32eb, bdxaddr12pair>;
|
def VST32 : StoreAliasVRX<store, v32sb, bdxaddr12pair>;
|
||||||
def VST64 : StoreAliasVRX<store, v64db, bdxaddr12pair>;
|
def VST64 : StoreAliasVRX<store, v64db, bdxaddr12pair>;
|
||||||
|
|
||||||
// Scatter element.
|
// Scatter element.
|
||||||
|
@ -935,6 +935,10 @@ let Predicates = [FeatureVector] in {
|
||||||
def VFA : BinaryVRRcFloatGeneric<"vfa", 0xE7E3>;
|
def VFA : BinaryVRRcFloatGeneric<"vfa", 0xE7E3>;
|
||||||
def VFADB : BinaryVRRc<"vfadb", 0xE7E3, fadd, v128db, v128db, 3, 0>;
|
def VFADB : BinaryVRRc<"vfadb", 0xE7E3, fadd, v128db, v128db, 3, 0>;
|
||||||
def WFADB : BinaryVRRc<"wfadb", 0xE7E3, fadd, v64db, v64db, 3, 8>;
|
def WFADB : BinaryVRRc<"wfadb", 0xE7E3, fadd, v64db, v64db, 3, 8>;
|
||||||
|
let Predicates = [FeatureVectorEnhancements1] in {
|
||||||
|
def VFASB : BinaryVRRc<"vfasb", 0xE7E3, fadd, v128sb, v128sb, 2, 0>;
|
||||||
|
def WFASB : BinaryVRRc<"wfasb", 0xE7E3, fadd, v32sb, v32sb, 2, 8>;
|
||||||
|
}
|
||||||
|
|
||||||
// Convert from fixed 64-bit.
|
// Convert from fixed 64-bit.
|
||||||
def VCDG : TernaryVRRaFloatGeneric<"vcdg", 0xE7C3>;
|
def VCDG : TernaryVRRaFloatGeneric<"vcdg", 0xE7C3>;
|
||||||
|
@ -966,6 +970,10 @@ let Predicates = [FeatureVector] in {
|
||||||
def VFD : BinaryVRRcFloatGeneric<"vfd", 0xE7E5>;
|
def VFD : BinaryVRRcFloatGeneric<"vfd", 0xE7E5>;
|
||||||
def VFDDB : BinaryVRRc<"vfddb", 0xE7E5, fdiv, v128db, v128db, 3, 0>;
|
def VFDDB : BinaryVRRc<"vfddb", 0xE7E5, fdiv, v128db, v128db, 3, 0>;
|
||||||
def WFDDB : BinaryVRRc<"wfddb", 0xE7E5, fdiv, v64db, v64db, 3, 8>;
|
def WFDDB : BinaryVRRc<"wfddb", 0xE7E5, fdiv, v64db, v64db, 3, 8>;
|
||||||
|
let Predicates = [FeatureVectorEnhancements1] in {
|
||||||
|
def VFDSB : BinaryVRRc<"vfdsb", 0xE7E5, fdiv, v128sb, v128sb, 2, 0>;
|
||||||
|
def WFDSB : BinaryVRRc<"wfdsb", 0xE7E5, fdiv, v32sb, v32sb, 2, 8>;
|
||||||
|
}
|
||||||
|
|
||||||
// Load FP integer.
|
// Load FP integer.
|
||||||
def VFI : TernaryVRRaFloatGeneric<"vfi", 0xE7C7>;
|
def VFI : TernaryVRRaFloatGeneric<"vfi", 0xE7C7>;
|
||||||
|
@ -973,18 +981,38 @@ let Predicates = [FeatureVector] in {
|
||||||
def WFIDB : TernaryVRRa<"wfidb", 0xE7C7, null_frag, v64db, v64db, 3, 8>;
|
def WFIDB : TernaryVRRa<"wfidb", 0xE7C7, null_frag, v64db, v64db, 3, 8>;
|
||||||
defm : VectorRounding<VFIDB, v128db>;
|
defm : VectorRounding<VFIDB, v128db>;
|
||||||
defm : VectorRounding<WFIDB, v64db>;
|
defm : VectorRounding<WFIDB, v64db>;
|
||||||
|
let Predicates = [FeatureVectorEnhancements1] in {
|
||||||
|
def VFISB : TernaryVRRa<"vfisb", 0xE7C7, int_s390_vfisb, v128sb, v128sb, 2, 0>;
|
||||||
|
def WFISB : TernaryVRRa<"wfisb", 0xE7C7, null_frag, v32sb, v32sb, 2, 8>;
|
||||||
|
defm : VectorRounding<VFISB, v128sb>;
|
||||||
|
defm : VectorRounding<WFISB, v32sb>;
|
||||||
|
}
|
||||||
|
|
||||||
// Load lengthened.
|
// Load lengthened.
|
||||||
def VLDE : UnaryVRRaFloatGeneric<"vlde", 0xE7C4>;
|
def VLDE : UnaryVRRaFloatGeneric<"vlde", 0xE7C4>;
|
||||||
def VLDEB : UnaryVRRa<"vldeb", 0xE7C4, z_vextend, v128db, v128eb, 2, 0>;
|
def VLDEB : UnaryVRRa<"vldeb", 0xE7C4, z_vextend, v128db, v128sb, 2, 0>;
|
||||||
def WLDEB : UnaryVRRa<"wldeb", 0xE7C4, fpextend, v64db, v32eb, 2, 8>;
|
def WLDEB : UnaryVRRa<"wldeb", 0xE7C4, fpextend, v64db, v32sb, 2, 8>;
|
||||||
|
let Predicates = [FeatureVectorEnhancements1] in {
|
||||||
|
let isAsmParserOnly = 1 in {
|
||||||
|
def VFLL : UnaryVRRaFloatGeneric<"vfll", 0xE7C4>;
|
||||||
|
def VFLLS : UnaryVRRa<"vflls", 0xE7C4, null_frag, v128db, v128sb, 2, 0>;
|
||||||
|
def WFLLS : UnaryVRRa<"wflls", 0xE7C4, null_frag, v64db, v32sb, 2, 8>;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Load rounded,
|
// Load rounded.
|
||||||
def VLED : TernaryVRRaFloatGeneric<"vled", 0xE7C5>;
|
def VLED : TernaryVRRaFloatGeneric<"vled", 0xE7C5>;
|
||||||
def VLEDB : TernaryVRRa<"vledb", 0xE7C5, null_frag, v128eb, v128db, 3, 0>;
|
def VLEDB : TernaryVRRa<"vledb", 0xE7C5, null_frag, v128sb, v128db, 3, 0>;
|
||||||
def WLEDB : TernaryVRRa<"wledb", 0xE7C5, null_frag, v32eb, v64db, 3, 8>;
|
def WLEDB : TernaryVRRa<"wledb", 0xE7C5, null_frag, v32sb, v64db, 3, 8>;
|
||||||
def : Pat<(v4f32 (z_vround (v2f64 VR128:$src))), (VLEDB VR128:$src, 0, 0)>;
|
def : Pat<(v4f32 (z_vround (v2f64 VR128:$src))), (VLEDB VR128:$src, 0, 0)>;
|
||||||
def : FPConversion<WLEDB, fpround, v32eb, v64db, 0, 0>;
|
def : FPConversion<WLEDB, fpround, v32sb, v64db, 0, 0>;
|
||||||
|
let Predicates = [FeatureVectorEnhancements1] in {
|
||||||
|
let isAsmParserOnly = 1 in {
|
||||||
|
def VFLR : TernaryVRRaFloatGeneric<"vflr", 0xE7C5>;
|
||||||
|
def VFLRD : TernaryVRRa<"vflrd", 0xE7C5, null_frag, v128sb, v128db, 3, 0>;
|
||||||
|
def WFLRD : TernaryVRRa<"wflrd", 0xE7C5, null_frag, v32sb, v64db, 3, 8>;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Maximum.
|
// Maximum.
|
||||||
multiclass VectorMax<Instruction insn, TypedReg tr> {
|
multiclass VectorMax<Instruction insn, TypedReg tr> {
|
||||||
|
@ -997,8 +1025,14 @@ let Predicates = [FeatureVector] in {
|
||||||
v128db, v128db, 3, 0>;
|
v128db, v128db, 3, 0>;
|
||||||
def WFMAXDB : TernaryVRRcFloat<"wfmaxdb", 0xE7EF, null_frag,
|
def WFMAXDB : TernaryVRRcFloat<"wfmaxdb", 0xE7EF, null_frag,
|
||||||
v64db, v64db, 3, 8>;
|
v64db, v64db, 3, 8>;
|
||||||
|
def VFMAXSB : TernaryVRRcFloat<"vfmaxsb", 0xE7EF, int_s390_vfmaxsb,
|
||||||
|
v128sb, v128sb, 2, 0>;
|
||||||
|
def WFMAXSB : TernaryVRRcFloat<"wfmaxsb", 0xE7EF, null_frag,
|
||||||
|
v32sb, v32sb, 2, 8>;
|
||||||
defm : VectorMax<VFMAXDB, v128db>;
|
defm : VectorMax<VFMAXDB, v128db>;
|
||||||
defm : VectorMax<WFMAXDB, v64db>;
|
defm : VectorMax<WFMAXDB, v64db>;
|
||||||
|
defm : VectorMax<VFMAXSB, v128sb>;
|
||||||
|
defm : VectorMax<WFMAXSB, v32sb>;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Minimum.
|
// Minimum.
|
||||||
|
@ -1012,30 +1046,50 @@ let Predicates = [FeatureVector] in {
|
||||||
v128db, v128db, 3, 0>;
|
v128db, v128db, 3, 0>;
|
||||||
def WFMINDB : TernaryVRRcFloat<"wfmindb", 0xE7EE, null_frag,
|
def WFMINDB : TernaryVRRcFloat<"wfmindb", 0xE7EE, null_frag,
|
||||||
v64db, v64db, 3, 8>;
|
v64db, v64db, 3, 8>;
|
||||||
|
def VFMINSB : TernaryVRRcFloat<"vfminsb", 0xE7EE, int_s390_vfminsb,
|
||||||
|
v128sb, v128sb, 2, 0>;
|
||||||
|
def WFMINSB : TernaryVRRcFloat<"wfminsb", 0xE7EE, null_frag,
|
||||||
|
v32sb, v32sb, 2, 8>;
|
||||||
defm : VectorMin<VFMINDB, v128db>;
|
defm : VectorMin<VFMINDB, v128db>;
|
||||||
defm : VectorMin<WFMINDB, v64db>;
|
defm : VectorMin<WFMINDB, v64db>;
|
||||||
|
defm : VectorMin<VFMINSB, v128sb>;
|
||||||
|
defm : VectorMin<WFMINSB, v32sb>;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Multiply.
|
// Multiply.
|
||||||
def VFM : BinaryVRRcFloatGeneric<"vfm", 0xE7E7>;
|
def VFM : BinaryVRRcFloatGeneric<"vfm", 0xE7E7>;
|
||||||
def VFMDB : BinaryVRRc<"vfmdb", 0xE7E7, fmul, v128db, v128db, 3, 0>;
|
def VFMDB : BinaryVRRc<"vfmdb", 0xE7E7, fmul, v128db, v128db, 3, 0>;
|
||||||
def WFMDB : BinaryVRRc<"wfmdb", 0xE7E7, fmul, v64db, v64db, 3, 8>;
|
def WFMDB : BinaryVRRc<"wfmdb", 0xE7E7, fmul, v64db, v64db, 3, 8>;
|
||||||
|
let Predicates = [FeatureVectorEnhancements1] in {
|
||||||
|
def VFMSB : BinaryVRRc<"vfmsb", 0xE7E7, fmul, v128sb, v128sb, 2, 0>;
|
||||||
|
def WFMSB : BinaryVRRc<"wfmsb", 0xE7E7, fmul, v32sb, v32sb, 2, 8>;
|
||||||
|
}
|
||||||
|
|
||||||
// Multiply and add.
|
// Multiply and add.
|
||||||
def VFMA : TernaryVRReFloatGeneric<"vfma", 0xE78F>;
|
def VFMA : TernaryVRReFloatGeneric<"vfma", 0xE78F>;
|
||||||
def VFMADB : TernaryVRRe<"vfmadb", 0xE78F, fma, v128db, v128db, 0, 3>;
|
def VFMADB : TernaryVRRe<"vfmadb", 0xE78F, fma, v128db, v128db, 0, 3>;
|
||||||
def WFMADB : TernaryVRRe<"wfmadb", 0xE78F, fma, v64db, v64db, 8, 3>;
|
def WFMADB : TernaryVRRe<"wfmadb", 0xE78F, fma, v64db, v64db, 8, 3>;
|
||||||
|
let Predicates = [FeatureVectorEnhancements1] in {
|
||||||
|
def VFMASB : TernaryVRRe<"vfmasb", 0xE78F, fma, v128sb, v128sb, 0, 2>;
|
||||||
|
def WFMASB : TernaryVRRe<"wfmasb", 0xE78F, fma, v32sb, v32sb, 8, 2>;
|
||||||
|
}
|
||||||
|
|
||||||
// Multiply and subtract.
|
// Multiply and subtract.
|
||||||
def VFMS : TernaryVRReFloatGeneric<"vfms", 0xE78E>;
|
def VFMS : TernaryVRReFloatGeneric<"vfms", 0xE78E>;
|
||||||
def VFMSDB : TernaryVRRe<"vfmsdb", 0xE78E, fms, v128db, v128db, 0, 3>;
|
def VFMSDB : TernaryVRRe<"vfmsdb", 0xE78E, fms, v128db, v128db, 0, 3>;
|
||||||
def WFMSDB : TernaryVRRe<"wfmsdb", 0xE78E, fms, v64db, v64db, 8, 3>;
|
def WFMSDB : TernaryVRRe<"wfmsdb", 0xE78E, fms, v64db, v64db, 8, 3>;
|
||||||
|
let Predicates = [FeatureVectorEnhancements1] in {
|
||||||
|
def VFMSSB : TernaryVRRe<"vfmssb", 0xE78E, fms, v128sb, v128sb, 0, 2>;
|
||||||
|
def WFMSSB : TernaryVRRe<"wfmssb", 0xE78E, fms, v32sb, v32sb, 8, 2>;
|
||||||
|
}
|
||||||
|
|
||||||
// Negative multiply and add.
|
// Negative multiply and add.
|
||||||
let Predicates = [FeatureVectorEnhancements1] in {
|
let Predicates = [FeatureVectorEnhancements1] in {
|
||||||
def VFNMA : TernaryVRReFloatGeneric<"vfnma", 0xE79F>;
|
def VFNMA : TernaryVRReFloatGeneric<"vfnma", 0xE79F>;
|
||||||
def VFNMADB : TernaryVRRe<"vfnmadb", 0xE79F, fnma, v128db, v128db, 0, 3>;
|
def VFNMADB : TernaryVRRe<"vfnmadb", 0xE79F, fnma, v128db, v128db, 0, 3>;
|
||||||
def WFNMADB : TernaryVRRe<"wfnmadb", 0xE79F, fnma, v64db, v64db, 8, 3>;
|
def WFNMADB : TernaryVRRe<"wfnmadb", 0xE79F, fnma, v64db, v64db, 8, 3>;
|
||||||
|
def VFNMASB : TernaryVRRe<"vfnmasb", 0xE79F, fnma, v128sb, v128sb, 0, 2>;
|
||||||
|
def WFNMASB : TernaryVRRe<"wfnmasb", 0xE79F, fnma, v32sb, v32sb, 8, 2>;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Negative multiply and subtract.
|
// Negative multiply and subtract.
|
||||||
|
@ -1043,40 +1097,70 @@ let Predicates = [FeatureVector] in {
|
||||||
def VFNMS : TernaryVRReFloatGeneric<"vfnms", 0xE79E>;
|
def VFNMS : TernaryVRReFloatGeneric<"vfnms", 0xE79E>;
|
||||||
def VFNMSDB : TernaryVRRe<"vfnmsdb", 0xE79E, fnms, v128db, v128db, 0, 3>;
|
def VFNMSDB : TernaryVRRe<"vfnmsdb", 0xE79E, fnms, v128db, v128db, 0, 3>;
|
||||||
def WFNMSDB : TernaryVRRe<"wfnmsdb", 0xE79E, fnms, v64db, v64db, 8, 3>;
|
def WFNMSDB : TernaryVRRe<"wfnmsdb", 0xE79E, fnms, v64db, v64db, 8, 3>;
|
||||||
|
def VFNMSSB : TernaryVRRe<"vfnmssb", 0xE79E, fnms, v128sb, v128sb, 0, 2>;
|
||||||
|
def WFNMSSB : TernaryVRRe<"wfnmssb", 0xE79E, fnms, v32sb, v32sb, 8, 2>;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Perform sign operation.
|
// Perform sign operation.
|
||||||
def VFPSO : BinaryVRRaFloatGeneric<"vfpso", 0xE7CC>;
|
def VFPSO : BinaryVRRaFloatGeneric<"vfpso", 0xE7CC>;
|
||||||
def VFPSODB : BinaryVRRa<"vfpsodb", 0xE7CC, null_frag, v128db, v128db, 3, 0>;
|
def VFPSODB : BinaryVRRa<"vfpsodb", 0xE7CC, null_frag, v128db, v128db, 3, 0>;
|
||||||
def WFPSODB : BinaryVRRa<"wfpsodb", 0xE7CC, null_frag, v64db, v64db, 3, 8>;
|
def WFPSODB : BinaryVRRa<"wfpsodb", 0xE7CC, null_frag, v64db, v64db, 3, 8>;
|
||||||
|
let Predicates = [FeatureVectorEnhancements1] in {
|
||||||
|
def VFPSOSB : BinaryVRRa<"vfpsosb", 0xE7CC, null_frag, v128sb, v128sb, 2, 0>;
|
||||||
|
def WFPSOSB : BinaryVRRa<"wfpsosb", 0xE7CC, null_frag, v32sb, v32sb, 2, 8>;
|
||||||
|
}
|
||||||
|
|
||||||
// Load complement.
|
// Load complement.
|
||||||
def VFLCDB : UnaryVRRa<"vflcdb", 0xE7CC, fneg, v128db, v128db, 3, 0, 0>;
|
def VFLCDB : UnaryVRRa<"vflcdb", 0xE7CC, fneg, v128db, v128db, 3, 0, 0>;
|
||||||
def WFLCDB : UnaryVRRa<"wflcdb", 0xE7CC, fneg, v64db, v64db, 3, 8, 0>;
|
def WFLCDB : UnaryVRRa<"wflcdb", 0xE7CC, fneg, v64db, v64db, 3, 8, 0>;
|
||||||
|
let Predicates = [FeatureVectorEnhancements1] in {
|
||||||
|
def VFLCSB : UnaryVRRa<"vflcsb", 0xE7CC, fneg, v128sb, v128sb, 2, 0, 0>;
|
||||||
|
def WFLCSB : UnaryVRRa<"wflcsb", 0xE7CC, fneg, v32sb, v32sb, 2, 8, 0>;
|
||||||
|
}
|
||||||
|
|
||||||
// Load negative.
|
// Load negative.
|
||||||
def VFLNDB : UnaryVRRa<"vflndb", 0xE7CC, fnabs, v128db, v128db, 3, 0, 1>;
|
def VFLNDB : UnaryVRRa<"vflndb", 0xE7CC, fnabs, v128db, v128db, 3, 0, 1>;
|
||||||
def WFLNDB : UnaryVRRa<"wflndb", 0xE7CC, fnabs, v64db, v64db, 3, 8, 1>;
|
def WFLNDB : UnaryVRRa<"wflndb", 0xE7CC, fnabs, v64db, v64db, 3, 8, 1>;
|
||||||
|
let Predicates = [FeatureVectorEnhancements1] in {
|
||||||
|
def VFLNSB : UnaryVRRa<"vflnsb", 0xE7CC, fnabs, v128sb, v128sb, 2, 0, 1>;
|
||||||
|
def WFLNSB : UnaryVRRa<"wflnsb", 0xE7CC, fnabs, v32sb, v32sb, 2, 8, 1>;
|
||||||
|
}
|
||||||
|
|
||||||
// Load positive.
|
// Load positive.
|
||||||
def VFLPDB : UnaryVRRa<"vflpdb", 0xE7CC, fabs, v128db, v128db, 3, 0, 2>;
|
def VFLPDB : UnaryVRRa<"vflpdb", 0xE7CC, fabs, v128db, v128db, 3, 0, 2>;
|
||||||
def WFLPDB : UnaryVRRa<"wflpdb", 0xE7CC, fabs, v64db, v64db, 3, 8, 2>;
|
def WFLPDB : UnaryVRRa<"wflpdb", 0xE7CC, fabs, v64db, v64db, 3, 8, 2>;
|
||||||
|
let Predicates = [FeatureVectorEnhancements1] in {
|
||||||
|
def VFLPSB : UnaryVRRa<"vflpsb", 0xE7CC, fabs, v128sb, v128sb, 2, 0, 2>;
|
||||||
|
def WFLPSB : UnaryVRRa<"wflpsb", 0xE7CC, fabs, v32sb, v32sb, 2, 8, 2>;
|
||||||
|
}
|
||||||
|
|
||||||
// Square root.
|
// Square root.
|
||||||
def VFSQ : UnaryVRRaFloatGeneric<"vfsq", 0xE7CE>;
|
def VFSQ : UnaryVRRaFloatGeneric<"vfsq", 0xE7CE>;
|
||||||
def VFSQDB : UnaryVRRa<"vfsqdb", 0xE7CE, fsqrt, v128db, v128db, 3, 0>;
|
def VFSQDB : UnaryVRRa<"vfsqdb", 0xE7CE, fsqrt, v128db, v128db, 3, 0>;
|
||||||
def WFSQDB : UnaryVRRa<"wfsqdb", 0xE7CE, fsqrt, v64db, v64db, 3, 8>;
|
def WFSQDB : UnaryVRRa<"wfsqdb", 0xE7CE, fsqrt, v64db, v64db, 3, 8>;
|
||||||
|
let Predicates = [FeatureVectorEnhancements1] in {
|
||||||
|
def VFSQSB : UnaryVRRa<"vfsqsb", 0xE7CE, fsqrt, v128sb, v128sb, 2, 0>;
|
||||||
|
def WFSQSB : UnaryVRRa<"wfsqsb", 0xE7CE, fsqrt, v32sb, v32sb, 2, 8>;
|
||||||
|
}
|
||||||
|
|
||||||
// Subtract.
|
// Subtract.
|
||||||
def VFS : BinaryVRRcFloatGeneric<"vfs", 0xE7E2>;
|
def VFS : BinaryVRRcFloatGeneric<"vfs", 0xE7E2>;
|
||||||
def VFSDB : BinaryVRRc<"vfsdb", 0xE7E2, fsub, v128db, v128db, 3, 0>;
|
def VFSDB : BinaryVRRc<"vfsdb", 0xE7E2, fsub, v128db, v128db, 3, 0>;
|
||||||
def WFSDB : BinaryVRRc<"wfsdb", 0xE7E2, fsub, v64db, v64db, 3, 8>;
|
def WFSDB : BinaryVRRc<"wfsdb", 0xE7E2, fsub, v64db, v64db, 3, 8>;
|
||||||
|
let Predicates = [FeatureVectorEnhancements1] in {
|
||||||
|
def VFSSB : BinaryVRRc<"vfssb", 0xE7E2, fsub, v128sb, v128sb, 2, 0>;
|
||||||
|
def WFSSB : BinaryVRRc<"wfssb", 0xE7E2, fsub, v32sb, v32sb, 2, 8>;
|
||||||
|
}
|
||||||
|
|
||||||
// Test data class immediate.
|
// Test data class immediate.
|
||||||
let Defs = [CC] in {
|
let Defs = [CC] in {
|
||||||
def VFTCI : BinaryVRIeFloatGeneric<"vftci", 0xE74A>;
|
def VFTCI : BinaryVRIeFloatGeneric<"vftci", 0xE74A>;
|
||||||
def VFTCIDB : BinaryVRIe<"vftcidb", 0xE74A, z_vftci, v128g, v128db, 3, 0>;
|
def VFTCIDB : BinaryVRIe<"vftcidb", 0xE74A, z_vftci, v128g, v128db, 3, 0>;
|
||||||
def WFTCIDB : BinaryVRIe<"wftcidb", 0xE74A, null_frag, v64g, v64db, 3, 8>;
|
def WFTCIDB : BinaryVRIe<"wftcidb", 0xE74A, null_frag, v64g, v64db, 3, 8>;
|
||||||
|
let Predicates = [FeatureVectorEnhancements1] in {
|
||||||
|
def VFTCISB : BinaryVRIe<"vftcisb", 0xE74A, z_vftci, v128f, v128sb, 2, 0>;
|
||||||
|
def WFTCISB : BinaryVRIe<"wftcisb", 0xE74A, null_frag, v32f, v32sb, 2, 8>;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1089,12 +1173,18 @@ let Predicates = [FeatureVector] in {
|
||||||
let Defs = [CC] in {
|
let Defs = [CC] in {
|
||||||
def WFC : CompareVRRaFloatGeneric<"wfc", 0xE7CB>;
|
def WFC : CompareVRRaFloatGeneric<"wfc", 0xE7CB>;
|
||||||
def WFCDB : CompareVRRa<"wfcdb", 0xE7CB, z_fcmp, v64db, 3>;
|
def WFCDB : CompareVRRa<"wfcdb", 0xE7CB, z_fcmp, v64db, 3>;
|
||||||
|
let Predicates = [FeatureVectorEnhancements1] in {
|
||||||
|
def WFCSB : CompareVRRa<"wfcsb", 0xE7CB, z_fcmp, v32sb, 2>;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Compare and signal scalar.
|
// Compare and signal scalar.
|
||||||
let Defs = [CC] in {
|
let Defs = [CC] in {
|
||||||
def WFK : CompareVRRaFloatGeneric<"wfk", 0xE7CA>;
|
def WFK : CompareVRRaFloatGeneric<"wfk", 0xE7CA>;
|
||||||
def WFKDB : CompareVRRa<"wfkdb", 0xE7CA, null_frag, v64db, 3>;
|
def WFKDB : CompareVRRa<"wfkdb", 0xE7CA, null_frag, v64db, 3>;
|
||||||
|
let Predicates = [FeatureVectorEnhancements1] in {
|
||||||
|
def WFKSB : CompareVRRa<"wfksb", 0xE7CA, null_frag, v32sb, 2>;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Compare equal.
|
// Compare equal.
|
||||||
|
@ -1103,6 +1193,12 @@ let Predicates = [FeatureVector] in {
|
||||||
v128g, v128db, 3, 0>;
|
v128g, v128db, 3, 0>;
|
||||||
defm WFCEDB : BinaryVRRcSPair<"wfcedb", 0xE7E8, null_frag, null_frag,
|
defm WFCEDB : BinaryVRRcSPair<"wfcedb", 0xE7E8, null_frag, null_frag,
|
||||||
v64g, v64db, 3, 8>;
|
v64g, v64db, 3, 8>;
|
||||||
|
let Predicates = [FeatureVectorEnhancements1] in {
|
||||||
|
defm VFCESB : BinaryVRRcSPair<"vfcesb", 0xE7E8, z_vfcmpe, z_vfcmpes,
|
||||||
|
v128f, v128sb, 2, 0>;
|
||||||
|
defm WFCESB : BinaryVRRcSPair<"wfcesb", 0xE7E8, null_frag, null_frag,
|
||||||
|
v32f, v32sb, 2, 8>;
|
||||||
|
}
|
||||||
|
|
||||||
// Compare and signal equal.
|
// Compare and signal equal.
|
||||||
let Predicates = [FeatureVectorEnhancements1] in {
|
let Predicates = [FeatureVectorEnhancements1] in {
|
||||||
|
@ -1110,6 +1206,10 @@ let Predicates = [FeatureVector] in {
|
||||||
v128g, v128db, 3, 4>;
|
v128g, v128db, 3, 4>;
|
||||||
defm WFKEDB : BinaryVRRcSPair<"wfkedb", 0xE7E8, null_frag, null_frag,
|
defm WFKEDB : BinaryVRRcSPair<"wfkedb", 0xE7E8, null_frag, null_frag,
|
||||||
v64g, v64db, 3, 12>;
|
v64g, v64db, 3, 12>;
|
||||||
|
defm VFKESB : BinaryVRRcSPair<"vfkesb", 0xE7E8, null_frag, null_frag,
|
||||||
|
v128f, v128sb, 2, 4>;
|
||||||
|
defm WFKESB : BinaryVRRcSPair<"wfkesb", 0xE7E8, null_frag, null_frag,
|
||||||
|
v32f, v32sb, 2, 12>;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Compare high.
|
// Compare high.
|
||||||
|
@ -1118,6 +1218,12 @@ let Predicates = [FeatureVector] in {
|
||||||
v128g, v128db, 3, 0>;
|
v128g, v128db, 3, 0>;
|
||||||
defm WFCHDB : BinaryVRRcSPair<"wfchdb", 0xE7EB, null_frag, null_frag,
|
defm WFCHDB : BinaryVRRcSPair<"wfchdb", 0xE7EB, null_frag, null_frag,
|
||||||
v64g, v64db, 3, 8>;
|
v64g, v64db, 3, 8>;
|
||||||
|
let Predicates = [FeatureVectorEnhancements1] in {
|
||||||
|
defm VFCHSB : BinaryVRRcSPair<"vfchsb", 0xE7EB, z_vfcmph, z_vfcmphs,
|
||||||
|
v128f, v128sb, 2, 0>;
|
||||||
|
defm WFCHSB : BinaryVRRcSPair<"wfchsb", 0xE7EB, null_frag, null_frag,
|
||||||
|
v32f, v32sb, 2, 8>;
|
||||||
|
}
|
||||||
|
|
||||||
// Compare and signal high.
|
// Compare and signal high.
|
||||||
let Predicates = [FeatureVectorEnhancements1] in {
|
let Predicates = [FeatureVectorEnhancements1] in {
|
||||||
|
@ -1125,6 +1231,10 @@ let Predicates = [FeatureVector] in {
|
||||||
v128g, v128db, 3, 4>;
|
v128g, v128db, 3, 4>;
|
||||||
defm WFKHDB : BinaryVRRcSPair<"wfkhdb", 0xE7EB, null_frag, null_frag,
|
defm WFKHDB : BinaryVRRcSPair<"wfkhdb", 0xE7EB, null_frag, null_frag,
|
||||||
v64g, v64db, 3, 12>;
|
v64g, v64db, 3, 12>;
|
||||||
|
defm VFKHSB : BinaryVRRcSPair<"vfkhsb", 0xE7EB, null_frag, null_frag,
|
||||||
|
v128f, v128sb, 2, 4>;
|
||||||
|
defm WFKHSB : BinaryVRRcSPair<"wfkhsb", 0xE7EB, null_frag, null_frag,
|
||||||
|
v32f, v32sb, 2, 12>;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Compare high or equal.
|
// Compare high or equal.
|
||||||
|
@ -1133,6 +1243,12 @@ let Predicates = [FeatureVector] in {
|
||||||
v128g, v128db, 3, 0>;
|
v128g, v128db, 3, 0>;
|
||||||
defm WFCHEDB : BinaryVRRcSPair<"wfchedb", 0xE7EA, null_frag, null_frag,
|
defm WFCHEDB : BinaryVRRcSPair<"wfchedb", 0xE7EA, null_frag, null_frag,
|
||||||
v64g, v64db, 3, 8>;
|
v64g, v64db, 3, 8>;
|
||||||
|
let Predicates = [FeatureVectorEnhancements1] in {
|
||||||
|
defm VFCHESB : BinaryVRRcSPair<"vfchesb", 0xE7EA, z_vfcmphe, z_vfcmphes,
|
||||||
|
v128f, v128sb, 2, 0>;
|
||||||
|
defm WFCHESB : BinaryVRRcSPair<"wfchesb", 0xE7EA, null_frag, null_frag,
|
||||||
|
v32f, v32sb, 2, 8>;
|
||||||
|
}
|
||||||
|
|
||||||
// Compare and signal high or equal.
|
// Compare and signal high or equal.
|
||||||
let Predicates = [FeatureVectorEnhancements1] in {
|
let Predicates = [FeatureVectorEnhancements1] in {
|
||||||
|
@ -1140,6 +1256,10 @@ let Predicates = [FeatureVector] in {
|
||||||
v128g, v128db, 3, 4>;
|
v128g, v128db, 3, 4>;
|
||||||
defm WFKHEDB : BinaryVRRcSPair<"wfkhedb", 0xE7EA, null_frag, null_frag,
|
defm WFKHEDB : BinaryVRRcSPair<"wfkhedb", 0xE7EA, null_frag, null_frag,
|
||||||
v64g, v64db, 3, 12>;
|
v64g, v64db, 3, 12>;
|
||||||
|
defm VFKHESB : BinaryVRRcSPair<"vfkhesb", 0xE7EA, null_frag, null_frag,
|
||||||
|
v128f, v128sb, 2, 4>;
|
||||||
|
defm WFKHESB : BinaryVRRcSPair<"wfkhesb", 0xE7EA, null_frag, null_frag,
|
||||||
|
v32f, v32sb, 2, 12>;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -272,7 +272,8 @@ class TypedReg<ValueType vtin, RegisterOperand opin> {
|
||||||
RegisterOperand op = opin;
|
RegisterOperand op = opin;
|
||||||
}
|
}
|
||||||
|
|
||||||
def v32eb : TypedReg<f32, VR32>;
|
def v32f : TypedReg<i32, VR32>;
|
||||||
|
def v32sb : TypedReg<f32, VR32>;
|
||||||
def v64g : TypedReg<i64, VR64>;
|
def v64g : TypedReg<i64, VR64>;
|
||||||
def v64db : TypedReg<f64, VR64>;
|
def v64db : TypedReg<f64, VR64>;
|
||||||
def v128b : TypedReg<v16i8, VR128>;
|
def v128b : TypedReg<v16i8, VR128>;
|
||||||
|
@ -280,7 +281,7 @@ def v128h : TypedReg<v8i16, VR128>;
|
||||||
def v128f : TypedReg<v4i32, VR128>;
|
def v128f : TypedReg<v4i32, VR128>;
|
||||||
def v128g : TypedReg<v2i64, VR128>;
|
def v128g : TypedReg<v2i64, VR128>;
|
||||||
def v128q : TypedReg<v16i8, VR128>;
|
def v128q : TypedReg<v16i8, VR128>;
|
||||||
def v128eb : TypedReg<v4f32, VR128>;
|
def v128sb : TypedReg<v4f32, VR128>;
|
||||||
def v128db : TypedReg<v2f64, VR128>;
|
def v128db : TypedReg<v2f64, VR128>;
|
||||||
def v128any : TypedReg<untyped, VR128>;
|
def v128any : TypedReg<untyped, VR128>;
|
||||||
|
|
||||||
|
|
|
@ -1316,42 +1316,60 @@ def : InstRW<[VecBF], (instregex "WC(L)?GDB$")>;
|
||||||
def : InstRW<[VecBF], (instregex "VL(DE|ED)$")>;
|
def : InstRW<[VecBF], (instregex "VL(DE|ED)$")>;
|
||||||
def : InstRW<[VecBF], (instregex "VL(DE|ED)B$")>;
|
def : InstRW<[VecBF], (instregex "VL(DE|ED)B$")>;
|
||||||
def : InstRW<[VecBF], (instregex "WL(DE|ED)B$")>;
|
def : InstRW<[VecBF], (instregex "WL(DE|ED)B$")>;
|
||||||
|
def : InstRW<[VecBF], (instregex "VFL(L|R)$")>;
|
||||||
|
def : InstRW<[VecBF], (instregex "VFL(LS|RD)$")>;
|
||||||
|
def : InstRW<[VecBF], (instregex "WFL(LS|RD)$")>;
|
||||||
def : InstRW<[VecBF2], (instregex "VFI$")>;
|
def : InstRW<[VecBF2], (instregex "VFI$")>;
|
||||||
def : InstRW<[VecBF], (instregex "VFIDB$")>;
|
def : InstRW<[VecBF], (instregex "VFIDB$")>;
|
||||||
def : InstRW<[VecBF], (instregex "WFIDB$")>;
|
def : InstRW<[VecBF], (instregex "WFIDB$")>;
|
||||||
|
def : InstRW<[VecBF2], (instregex "VFISB$")>;
|
||||||
|
def : InstRW<[VecBF], (instregex "WFISB$")>;
|
||||||
|
|
||||||
// Sign operations
|
// Sign operations
|
||||||
def : InstRW<[VecXsPm], (instregex "VFPSO$")>;
|
def : InstRW<[VecXsPm], (instregex "VFPSO$")>;
|
||||||
def : InstRW<[VecXsPm], (instregex "(V|W)FPSODB$")>;
|
def : InstRW<[VecXsPm], (instregex "(V|W)FPSODB$")>;
|
||||||
|
def : InstRW<[VecXsPm], (instregex "(V|W)FPSOSB$")>;
|
||||||
def : InstRW<[VecXsPm], (instregex "(V|W)FL(C|N|P)DB$")>;
|
def : InstRW<[VecXsPm], (instregex "(V|W)FL(C|N|P)DB$")>;
|
||||||
|
def : InstRW<[VecXsPm], (instregex "(V|W)FL(C|N|P)SB$")>;
|
||||||
|
|
||||||
// Minimum / maximum
|
// Minimum / maximum
|
||||||
def : InstRW<[VecXsPm], (instregex "VF(MAX|MIN)$")>;
|
def : InstRW<[VecXsPm], (instregex "VF(MAX|MIN)$")>;
|
||||||
def : InstRW<[VecXsPm], (instregex "VF(MAX|MIN)DB$")>;
|
def : InstRW<[VecXsPm], (instregex "VF(MAX|MIN)DB$")>;
|
||||||
def : InstRW<[VecXsPm], (instregex "WF(MAX|MIN)DB$")>;
|
def : InstRW<[VecXsPm], (instregex "WF(MAX|MIN)DB$")>;
|
||||||
|
def : InstRW<[VecXsPm], (instregex "VF(MAX|MIN)SB$")>;
|
||||||
|
def : InstRW<[VecXsPm], (instregex "WF(MAX|MIN)SB$")>;
|
||||||
|
|
||||||
// Test data class
|
// Test data class
|
||||||
def : InstRW<[VecXsPm, Lat4], (instregex "VFTCI$")>;
|
def : InstRW<[VecXsPm, Lat4], (instregex "VFTCI$")>;
|
||||||
def : InstRW<[VecXsPm, Lat4], (instregex "(V|W)FTCIDB$")>;
|
def : InstRW<[VecXsPm, Lat4], (instregex "(V|W)FTCIDB$")>;
|
||||||
|
def : InstRW<[VecXsPm, Lat4], (instregex "(V|W)FTCISB$")>;
|
||||||
|
|
||||||
// Add / subtract
|
// Add / subtract
|
||||||
def : InstRW<[VecBF2], (instregex "VF(A|S)$")>;
|
def : InstRW<[VecBF2], (instregex "VF(A|S)$")>;
|
||||||
def : InstRW<[VecBF], (instregex "VF(A|S)DB$")>;
|
def : InstRW<[VecBF], (instregex "VF(A|S)DB$")>;
|
||||||
def : InstRW<[VecBF], (instregex "WF(A|S)DB$")>;
|
def : InstRW<[VecBF], (instregex "WF(A|S)DB$")>;
|
||||||
|
def : InstRW<[VecBF2], (instregex "VF(A|S)SB$")>;
|
||||||
|
def : InstRW<[VecBF], (instregex "WF(A|S)SB$")>;
|
||||||
|
|
||||||
// Multiply / multiply-and-add/subtract
|
// Multiply / multiply-and-add/subtract
|
||||||
def : InstRW<[VecBF2], (instregex "VFM$")>;
|
def : InstRW<[VecBF2], (instregex "VFM$")>;
|
||||||
def : InstRW<[VecBF], (instregex "VFMDB$")>;
|
def : InstRW<[VecBF], (instregex "VFMDB$")>;
|
||||||
def : InstRW<[VecBF], (instregex "WFMDB$")>;
|
def : InstRW<[VecBF], (instregex "WFMDB$")>;
|
||||||
|
def : InstRW<[VecBF2], (instregex "VFMSB$")>;
|
||||||
|
def : InstRW<[VecBF], (instregex "WFMSB$")>;
|
||||||
def : InstRW<[VecBF2], (instregex "VF(N)?M(A|S)$")>;
|
def : InstRW<[VecBF2], (instregex "VF(N)?M(A|S)$")>;
|
||||||
def : InstRW<[VecBF], (instregex "VF(N)?M(A|S)DB$")>;
|
def : InstRW<[VecBF], (instregex "VF(N)?M(A|S)DB$")>;
|
||||||
def : InstRW<[VecBF], (instregex "WF(N)?M(A|S)DB$")>;
|
def : InstRW<[VecBF], (instregex "WF(N)?M(A|S)DB$")>;
|
||||||
|
def : InstRW<[VecBF2], (instregex "VF(N)?M(A|S)SB$")>;
|
||||||
|
def : InstRW<[VecBF], (instregex "WF(N)?M(A|S)SB$")>;
|
||||||
|
|
||||||
// Divide / square root
|
// Divide / square root
|
||||||
def : InstRW<[VecFPd], (instregex "VFD$")>;
|
def : InstRW<[VecFPd], (instregex "VFD$")>;
|
||||||
def : InstRW<[VecFPd], (instregex "(V|W)FDDB$")>;
|
def : InstRW<[VecFPd], (instregex "(V|W)FDDB$")>;
|
||||||
|
def : InstRW<[VecFPd], (instregex "(V|W)FDSB$")>;
|
||||||
def : InstRW<[VecFPd], (instregex "VFSQ$")>;
|
def : InstRW<[VecFPd], (instregex "VFSQ$")>;
|
||||||
def : InstRW<[VecFPd], (instregex "(V|W)FSQDB$")>;
|
def : InstRW<[VecFPd], (instregex "(V|W)FSQDB$")>;
|
||||||
|
def : InstRW<[VecFPd], (instregex "(V|W)FSQSB$")>;
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
// Vector: Floating-point comparison
|
// Vector: Floating-point comparison
|
||||||
|
@ -1360,10 +1378,15 @@ def : InstRW<[VecFPd], (instregex "(V|W)FSQDB$")>;
|
||||||
def : InstRW<[VecXsPm], (instregex "VF(C|K)(E|H|HE)$")>;
|
def : InstRW<[VecXsPm], (instregex "VF(C|K)(E|H|HE)$")>;
|
||||||
def : InstRW<[VecXsPm], (instregex "VF(C|K)(E|H|HE)DB$")>;
|
def : InstRW<[VecXsPm], (instregex "VF(C|K)(E|H|HE)DB$")>;
|
||||||
def : InstRW<[VecXsPm], (instregex "WF(C|K)(E|H|HE)DB$")>;
|
def : InstRW<[VecXsPm], (instregex "WF(C|K)(E|H|HE)DB$")>;
|
||||||
|
def : InstRW<[VecXsPm], (instregex "VF(C|K)(E|H|HE)SB$")>;
|
||||||
|
def : InstRW<[VecXsPm], (instregex "WF(C|K)(E|H|HE)SB$")>;
|
||||||
def : InstRW<[VecXsPm, Lat4], (instregex "VF(C|K)(E|H|HE)DBS$")>;
|
def : InstRW<[VecXsPm, Lat4], (instregex "VF(C|K)(E|H|HE)DBS$")>;
|
||||||
def : InstRW<[VecXsPm, Lat4], (instregex "WF(C|K)(E|H|HE)DBS$")>;
|
def : InstRW<[VecXsPm, Lat4], (instregex "WF(C|K)(E|H|HE)DBS$")>;
|
||||||
|
def : InstRW<[VecXsPm, Lat4], (instregex "VF(C|K)(E|H|HE)SBS$")>;
|
||||||
|
def : InstRW<[VecXsPm, Lat4], (instregex "WF(C|K)(E|H|HE)SBS$")>;
|
||||||
def : InstRW<[VecXsPm, Lat4], (instregex "WF(C|K)$")>;
|
def : InstRW<[VecXsPm, Lat4], (instregex "WF(C|K)$")>;
|
||||||
def : InstRW<[VecXsPm, Lat4], (instregex "WF(C|K)DB$")>;
|
def : InstRW<[VecXsPm, Lat4], (instregex "WF(C|K)DB$")>;
|
||||||
|
def : InstRW<[VecXsPm, Lat4], (instregex "WF(C|K)SB$")>;
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
// Vector: Floating-point insertion and extraction
|
// Vector: Floating-point insertion and extraction
|
||||||
|
|
|
@ -200,14 +200,26 @@ bool SystemZShortenInst::processBlock(MachineBasicBlock &MBB) {
|
||||||
Changed |= shortenOn001AddCC(MI, SystemZ::ADBR);
|
Changed |= shortenOn001AddCC(MI, SystemZ::ADBR);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case SystemZ::WFASB:
|
||||||
|
Changed |= shortenOn001AddCC(MI, SystemZ::AEBR);
|
||||||
|
break;
|
||||||
|
|
||||||
case SystemZ::WFDDB:
|
case SystemZ::WFDDB:
|
||||||
Changed |= shortenOn001(MI, SystemZ::DDBR);
|
Changed |= shortenOn001(MI, SystemZ::DDBR);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case SystemZ::WFDSB:
|
||||||
|
Changed |= shortenOn001(MI, SystemZ::DEBR);
|
||||||
|
break;
|
||||||
|
|
||||||
case SystemZ::WFIDB:
|
case SystemZ::WFIDB:
|
||||||
Changed |= shortenFPConv(MI, SystemZ::FIDBRA);
|
Changed |= shortenFPConv(MI, SystemZ::FIDBRA);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case SystemZ::WFISB:
|
||||||
|
Changed |= shortenFPConv(MI, SystemZ::FIEBRA);
|
||||||
|
break;
|
||||||
|
|
||||||
case SystemZ::WLDEB:
|
case SystemZ::WLDEB:
|
||||||
Changed |= shortenOn01(MI, SystemZ::LDEBR);
|
Changed |= shortenOn01(MI, SystemZ::LDEBR);
|
||||||
break;
|
break;
|
||||||
|
@ -220,30 +232,58 @@ bool SystemZShortenInst::processBlock(MachineBasicBlock &MBB) {
|
||||||
Changed |= shortenOn001(MI, SystemZ::MDBR);
|
Changed |= shortenOn001(MI, SystemZ::MDBR);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case SystemZ::WFMSB:
|
||||||
|
Changed |= shortenOn001(MI, SystemZ::MEEBR);
|
||||||
|
break;
|
||||||
|
|
||||||
case SystemZ::WFLCDB:
|
case SystemZ::WFLCDB:
|
||||||
Changed |= shortenOn01(MI, SystemZ::LCDFR);
|
Changed |= shortenOn01(MI, SystemZ::LCDFR);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case SystemZ::WFLCSB:
|
||||||
|
Changed |= shortenOn01(MI, SystemZ::LCDFR_32);
|
||||||
|
break;
|
||||||
|
|
||||||
case SystemZ::WFLNDB:
|
case SystemZ::WFLNDB:
|
||||||
Changed |= shortenOn01(MI, SystemZ::LNDFR);
|
Changed |= shortenOn01(MI, SystemZ::LNDFR);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case SystemZ::WFLNSB:
|
||||||
|
Changed |= shortenOn01(MI, SystemZ::LNDFR_32);
|
||||||
|
break;
|
||||||
|
|
||||||
case SystemZ::WFLPDB:
|
case SystemZ::WFLPDB:
|
||||||
Changed |= shortenOn01(MI, SystemZ::LPDFR);
|
Changed |= shortenOn01(MI, SystemZ::LPDFR);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case SystemZ::WFLPSB:
|
||||||
|
Changed |= shortenOn01(MI, SystemZ::LPDFR_32);
|
||||||
|
break;
|
||||||
|
|
||||||
case SystemZ::WFSQDB:
|
case SystemZ::WFSQDB:
|
||||||
Changed |= shortenOn01(MI, SystemZ::SQDBR);
|
Changed |= shortenOn01(MI, SystemZ::SQDBR);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case SystemZ::WFSQSB:
|
||||||
|
Changed |= shortenOn01(MI, SystemZ::SQEBR);
|
||||||
|
break;
|
||||||
|
|
||||||
case SystemZ::WFSDB:
|
case SystemZ::WFSDB:
|
||||||
Changed |= shortenOn001AddCC(MI, SystemZ::SDBR);
|
Changed |= shortenOn001AddCC(MI, SystemZ::SDBR);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case SystemZ::WFSSB:
|
||||||
|
Changed |= shortenOn001AddCC(MI, SystemZ::SEBR);
|
||||||
|
break;
|
||||||
|
|
||||||
case SystemZ::WFCDB:
|
case SystemZ::WFCDB:
|
||||||
Changed |= shortenOn01(MI, SystemZ::CDBR);
|
Changed |= shortenOn01(MI, SystemZ::CDBR);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case SystemZ::WFCSB:
|
||||||
|
Changed |= shortenOn01(MI, SystemZ::CEBR);
|
||||||
|
break;
|
||||||
|
|
||||||
case SystemZ::VL32:
|
case SystemZ::VL32:
|
||||||
// For z13 we prefer LDE over LE to avoid partial register dependencies.
|
// For z13 we prefer LDE over LE to avoid partial register dependencies.
|
||||||
Changed |= shortenOn0(MI, SystemZ::LDE32);
|
Changed |= shortenOn0(MI, SystemZ::LDE32);
|
||||||
|
|
|
@ -372,6 +372,9 @@ int SystemZTTIImpl::getArithmeticInstrCost(
|
||||||
Opcode == Instruction::FMul || Opcode == Instruction::FDiv) {
|
Opcode == Instruction::FMul || Opcode == Instruction::FDiv) {
|
||||||
switch (ScalarBits) {
|
switch (ScalarBits) {
|
||||||
case 32: {
|
case 32: {
|
||||||
|
// The vector enhancements facility 1 provides v4f32 instructions.
|
||||||
|
if (ST->hasVectorEnhancements1())
|
||||||
|
return NumVectors;
|
||||||
// Return the cost of multiple scalar invocation plus the cost of
|
// Return the cost of multiple scalar invocation plus the cost of
|
||||||
// inserting and extracting the values.
|
// inserting and extracting the values.
|
||||||
unsigned ScalarCost = getArithmeticInstrCost(Opcode, Ty->getScalarType());
|
unsigned ScalarCost = getArithmeticInstrCost(Opcode, Ty->getScalarType());
|
||||||
|
|
|
@ -1,4 +1,7 @@
|
||||||
; RUN: opt < %s -cost-model -analyze -mtriple=systemz-unknown -mcpu=z13 | FileCheck %s
|
; RUN: opt < %s -cost-model -analyze -mtriple=systemz-unknown -mcpu=z13 \
|
||||||
|
; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-Z13 %s
|
||||||
|
; RUN: opt < %s -cost-model -analyze -mtriple=systemz-unknown -mcpu=z14 \
|
||||||
|
; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-Z14 %s
|
||||||
;
|
;
|
||||||
; Note: The scalarized vector instructions cost is not including any
|
; Note: The scalarized vector instructions cost is not including any
|
||||||
; extracts, due to the undef operands
|
; extracts, due to the undef operands
|
||||||
|
@ -21,13 +24,17 @@ define void @fadd() {
|
||||||
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res0 = fadd float undef, undef
|
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res0 = fadd float undef, undef
|
||||||
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res1 = fadd double undef, undef
|
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res1 = fadd double undef, undef
|
||||||
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res2 = fadd fp128 undef, undef
|
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res2 = fadd fp128 undef, undef
|
||||||
; CHECK: Cost Model: Found an estimated cost of 8 for instruction: %res3 = fadd <2 x float> undef, undef
|
; CHECK-Z13: Cost Model: Found an estimated cost of 8 for instruction: %res3 = fadd <2 x float> undef, undef
|
||||||
|
; CHECK-Z14: Cost Model: Found an estimated cost of 1 for instruction: %res3 = fadd <2 x float> undef, undef
|
||||||
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res4 = fadd <2 x double> undef, undef
|
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res4 = fadd <2 x double> undef, undef
|
||||||
; CHECK: Cost Model: Found an estimated cost of 8 for instruction: %res5 = fadd <4 x float> undef, undef
|
; CHECK-Z13: Cost Model: Found an estimated cost of 8 for instruction: %res5 = fadd <4 x float> undef, undef
|
||||||
|
; CHECK-Z14: Cost Model: Found an estimated cost of 1 for instruction: %res5 = fadd <4 x float> undef, undef
|
||||||
; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %res6 = fadd <4 x double> undef, undef
|
; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %res6 = fadd <4 x double> undef, undef
|
||||||
; CHECK: Cost Model: Found an estimated cost of 16 for instruction: %res7 = fadd <8 x float> undef, undef
|
; CHECK-Z13: Cost Model: Found an estimated cost of 16 for instruction: %res7 = fadd <8 x float> undef, undef
|
||||||
|
; CHECK-Z14: Cost Model: Found an estimated cost of 2 for instruction: %res7 = fadd <8 x float> undef, undef
|
||||||
; CHECK: Cost Model: Found an estimated cost of 4 for instruction: %res8 = fadd <8 x double> undef, undef
|
; CHECK: Cost Model: Found an estimated cost of 4 for instruction: %res8 = fadd <8 x double> undef, undef
|
||||||
; CHECK: Cost Model: Found an estimated cost of 32 for instruction: %res9 = fadd <16 x float> undef, undef
|
; CHECK-Z13: Cost Model: Found an estimated cost of 32 for instruction: %res9 = fadd <16 x float> undef, undef
|
||||||
|
; CHECK-Z14: Cost Model: Found an estimated cost of 4 for instruction: %res9 = fadd <16 x float> undef, undef
|
||||||
; CHECK: Cost Model: Found an estimated cost of 8 for instruction: %res10 = fadd <16 x double> undef, undef
|
; CHECK: Cost Model: Found an estimated cost of 8 for instruction: %res10 = fadd <16 x double> undef, undef
|
||||||
|
|
||||||
ret void;
|
ret void;
|
||||||
|
@ -49,13 +56,17 @@ define void @fsub() {
|
||||||
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res0 = fsub float undef, undef
|
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res0 = fsub float undef, undef
|
||||||
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res1 = fsub double undef, undef
|
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res1 = fsub double undef, undef
|
||||||
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res2 = fsub fp128 undef, undef
|
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res2 = fsub fp128 undef, undef
|
||||||
; CHECK: Cost Model: Found an estimated cost of 8 for instruction: %res3 = fsub <2 x float> undef, undef
|
; CHECK-Z13: Cost Model: Found an estimated cost of 8 for instruction: %res3 = fsub <2 x float> undef, undef
|
||||||
|
; CHECK-Z14: Cost Model: Found an estimated cost of 1 for instruction: %res3 = fsub <2 x float> undef, undef
|
||||||
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res4 = fsub <2 x double> undef, undef
|
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res4 = fsub <2 x double> undef, undef
|
||||||
; CHECK: Cost Model: Found an estimated cost of 8 for instruction: %res5 = fsub <4 x float> undef, undef
|
; CHECK-Z13: Cost Model: Found an estimated cost of 8 for instruction: %res5 = fsub <4 x float> undef, undef
|
||||||
|
; CHECK-Z14: Cost Model: Found an estimated cost of 1 for instruction: %res5 = fsub <4 x float> undef, undef
|
||||||
; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %res6 = fsub <4 x double> undef, undef
|
; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %res6 = fsub <4 x double> undef, undef
|
||||||
; CHECK: Cost Model: Found an estimated cost of 16 for instruction: %res7 = fsub <8 x float> undef, undef
|
; CHECK-Z13: Cost Model: Found an estimated cost of 16 for instruction: %res7 = fsub <8 x float> undef, undef
|
||||||
|
; CHECK-Z14: Cost Model: Found an estimated cost of 2 for instruction: %res7 = fsub <8 x float> undef, undef
|
||||||
; CHECK: Cost Model: Found an estimated cost of 4 for instruction: %res8 = fsub <8 x double> undef, undef
|
; CHECK: Cost Model: Found an estimated cost of 4 for instruction: %res8 = fsub <8 x double> undef, undef
|
||||||
; CHECK: Cost Model: Found an estimated cost of 32 for instruction: %res9 = fsub <16 x float> undef, undef
|
; CHECK-Z13: Cost Model: Found an estimated cost of 32 for instruction: %res9 = fsub <16 x float> undef, undef
|
||||||
|
; CHECK-Z14: Cost Model: Found an estimated cost of 4 for instruction: %res9 = fsub <16 x float> undef, undef
|
||||||
; CHECK: Cost Model: Found an estimated cost of 8 for instruction: %res10 = fsub <16 x double> undef, undef
|
; CHECK: Cost Model: Found an estimated cost of 8 for instruction: %res10 = fsub <16 x double> undef, undef
|
||||||
|
|
||||||
ret void;
|
ret void;
|
||||||
|
@ -77,13 +88,17 @@ define void @fmul() {
|
||||||
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res0 = fmul float undef, undef
|
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res0 = fmul float undef, undef
|
||||||
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res1 = fmul double undef, undef
|
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res1 = fmul double undef, undef
|
||||||
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res2 = fmul fp128 undef, undef
|
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res2 = fmul fp128 undef, undef
|
||||||
; CHECK: Cost Model: Found an estimated cost of 8 for instruction: %res3 = fmul <2 x float> undef, undef
|
; CHECK-Z13: Cost Model: Found an estimated cost of 8 for instruction: %res3 = fmul <2 x float> undef, undef
|
||||||
|
; CHECK-Z14: Cost Model: Found an estimated cost of 1 for instruction: %res3 = fmul <2 x float> undef, undef
|
||||||
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res4 = fmul <2 x double> undef, undef
|
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res4 = fmul <2 x double> undef, undef
|
||||||
; CHECK: Cost Model: Found an estimated cost of 8 for instruction: %res5 = fmul <4 x float> undef, undef
|
; CHECK-Z13: Cost Model: Found an estimated cost of 8 for instruction: %res5 = fmul <4 x float> undef, undef
|
||||||
|
; CHECK-Z14: Cost Model: Found an estimated cost of 1 for instruction: %res5 = fmul <4 x float> undef, undef
|
||||||
; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %res6 = fmul <4 x double> undef, undef
|
; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %res6 = fmul <4 x double> undef, undef
|
||||||
; CHECK: Cost Model: Found an estimated cost of 16 for instruction: %res7 = fmul <8 x float> undef, undef
|
; CHECK-Z13: Cost Model: Found an estimated cost of 16 for instruction: %res7 = fmul <8 x float> undef, undef
|
||||||
|
; CHECK-Z14: Cost Model: Found an estimated cost of 2 for instruction: %res7 = fmul <8 x float> undef, undef
|
||||||
; CHECK: Cost Model: Found an estimated cost of 4 for instruction: %res8 = fmul <8 x double> undef, undef
|
; CHECK: Cost Model: Found an estimated cost of 4 for instruction: %res8 = fmul <8 x double> undef, undef
|
||||||
; CHECK: Cost Model: Found an estimated cost of 32 for instruction: %res9 = fmul <16 x float> undef, undef
|
; CHECK-Z13: Cost Model: Found an estimated cost of 32 for instruction: %res9 = fmul <16 x float> undef, undef
|
||||||
|
; CHECK-Z14: Cost Model: Found an estimated cost of 4 for instruction: %res9 = fmul <16 x float> undef, undef
|
||||||
; CHECK: Cost Model: Found an estimated cost of 8 for instruction: %res10 = fmul <16 x double> undef, undef
|
; CHECK: Cost Model: Found an estimated cost of 8 for instruction: %res10 = fmul <16 x double> undef, undef
|
||||||
|
|
||||||
ret void;
|
ret void;
|
||||||
|
@ -105,13 +120,17 @@ define void @fdiv() {
|
||||||
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res0 = fdiv float undef, undef
|
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res0 = fdiv float undef, undef
|
||||||
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res1 = fdiv double undef, undef
|
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res1 = fdiv double undef, undef
|
||||||
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res2 = fdiv fp128 undef, undef
|
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res2 = fdiv fp128 undef, undef
|
||||||
; CHECK: Cost Model: Found an estimated cost of 8 for instruction: %res3 = fdiv <2 x float> undef, undef
|
; CHECK-Z13: Cost Model: Found an estimated cost of 8 for instruction: %res3 = fdiv <2 x float> undef, undef
|
||||||
|
; CHECK-Z14: Cost Model: Found an estimated cost of 1 for instruction: %res3 = fdiv <2 x float> undef, undef
|
||||||
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res4 = fdiv <2 x double> undef, undef
|
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res4 = fdiv <2 x double> undef, undef
|
||||||
; CHECK: Cost Model: Found an estimated cost of 8 for instruction: %res5 = fdiv <4 x float> undef, undef
|
; CHECK-Z13: Cost Model: Found an estimated cost of 8 for instruction: %res5 = fdiv <4 x float> undef, undef
|
||||||
|
; CHECK-Z14: Cost Model: Found an estimated cost of 1 for instruction: %res5 = fdiv <4 x float> undef, undef
|
||||||
; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %res6 = fdiv <4 x double> undef, undef
|
; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %res6 = fdiv <4 x double> undef, undef
|
||||||
; CHECK: Cost Model: Found an estimated cost of 16 for instruction: %res7 = fdiv <8 x float> undef, undef
|
; CHECK-Z13: Cost Model: Found an estimated cost of 16 for instruction: %res7 = fdiv <8 x float> undef, undef
|
||||||
|
; CHECK-Z14: Cost Model: Found an estimated cost of 2 for instruction: %res7 = fdiv <8 x float> undef, undef
|
||||||
; CHECK: Cost Model: Found an estimated cost of 4 for instruction: %res8 = fdiv <8 x double> undef, undef
|
; CHECK: Cost Model: Found an estimated cost of 4 for instruction: %res8 = fdiv <8 x double> undef, undef
|
||||||
; CHECK: Cost Model: Found an estimated cost of 32 for instruction: %res9 = fdiv <16 x float> undef, undef
|
; CHECK-Z13: Cost Model: Found an estimated cost of 32 for instruction: %res9 = fdiv <16 x float> undef, undef
|
||||||
|
; CHECK-Z14: Cost Model: Found an estimated cost of 4 for instruction: %res9 = fdiv <16 x float> undef, undef
|
||||||
; CHECK: Cost Model: Found an estimated cost of 8 for instruction: %res10 = fdiv <16 x double> undef, undef
|
; CHECK: Cost Model: Found an estimated cost of 8 for instruction: %res10 = fdiv <16 x double> undef, undef
|
||||||
|
|
||||||
ret void;
|
ret void;
|
||||||
|
|
|
@ -0,0 +1,40 @@
|
||||||
|
; Test floating-point absolute on z14.
|
||||||
|
;
|
||||||
|
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
|
||||||
|
|
||||||
|
; Test f32.
|
||||||
|
declare float @llvm.fabs.f32(float %f)
|
||||||
|
define float @f1(float %f) {
|
||||||
|
; CHECK-LABEL: f1:
|
||||||
|
; CHECK: lpdfr %f0, %f0
|
||||||
|
; CHECK: br %r14
|
||||||
|
%res = call float @llvm.fabs.f32(float %f)
|
||||||
|
ret float %res
|
||||||
|
}
|
||||||
|
|
||||||
|
; Test f64.
|
||||||
|
declare double @llvm.fabs.f64(double %f)
|
||||||
|
define double @f2(double %f) {
|
||||||
|
; CHECK-LABEL: f2:
|
||||||
|
; CHECK: lpdfr %f0, %f0
|
||||||
|
; CHECK: br %r14
|
||||||
|
%res = call double @llvm.fabs.f64(double %f)
|
||||||
|
ret double %res
|
||||||
|
}
|
||||||
|
|
||||||
|
; Test f128. With the loads and stores, a pure absolute would probably
|
||||||
|
; be better implemented using an NI on the upper byte. Do some extra
|
||||||
|
; processing so that using FPRs is unequivocally better.
|
||||||
|
declare fp128 @llvm.fabs.f128(fp128 %f)
|
||||||
|
define void @f3(fp128 *%ptr, fp128 *%ptr2) {
|
||||||
|
; CHECK-LABEL: f3:
|
||||||
|
; CHECK: lpxbr
|
||||||
|
; CHECK: dxbr
|
||||||
|
; CHECK: br %r14
|
||||||
|
%orig = load fp128 , fp128 *%ptr
|
||||||
|
%abs = call fp128 @llvm.fabs.f128(fp128 %orig)
|
||||||
|
%op2 = load fp128 , fp128 *%ptr2
|
||||||
|
%res = fdiv fp128 %abs, %op2
|
||||||
|
store fp128 %res, fp128 *%ptr
|
||||||
|
ret void
|
||||||
|
}
|
|
@ -0,0 +1,43 @@
|
||||||
|
; Test negated floating-point absolute on z14.
|
||||||
|
;
|
||||||
|
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
|
||||||
|
|
||||||
|
; Test f32.
|
||||||
|
declare float @llvm.fabs.f32(float %f)
|
||||||
|
define float @f1(float %f) {
|
||||||
|
; CHECK-LABEL: f1:
|
||||||
|
; CHECK: lndfr %f0, %f0
|
||||||
|
; CHECK: br %r14
|
||||||
|
%abs = call float @llvm.fabs.f32(float %f)
|
||||||
|
%res = fsub float -0.0, %abs
|
||||||
|
ret float %res
|
||||||
|
}
|
||||||
|
|
||||||
|
; Test f64.
|
||||||
|
declare double @llvm.fabs.f64(double %f)
|
||||||
|
define double @f2(double %f) {
|
||||||
|
; CHECK-LABEL: f2:
|
||||||
|
; CHECK: lndfr %f0, %f0
|
||||||
|
; CHECK: br %r14
|
||||||
|
%abs = call double @llvm.fabs.f64(double %f)
|
||||||
|
%res = fsub double -0.0, %abs
|
||||||
|
ret double %res
|
||||||
|
}
|
||||||
|
|
||||||
|
; Test f128. With the loads and stores, a pure negative-absolute would
|
||||||
|
; probably be better implemented using an OI on the upper byte. Do some
|
||||||
|
; extra processing so that using FPRs is unequivocally better.
|
||||||
|
declare fp128 @llvm.fabs.f128(fp128 %f)
|
||||||
|
define void @f3(fp128 *%ptr, fp128 *%ptr2) {
|
||||||
|
; CHECK-LABEL: f3:
|
||||||
|
; CHECK: lnxbr
|
||||||
|
; CHECK: dxbr
|
||||||
|
; CHECK: br %r14
|
||||||
|
%orig = load fp128 , fp128 *%ptr
|
||||||
|
%abs = call fp128 @llvm.fabs.f128(fp128 %orig)
|
||||||
|
%negabs = fsub fp128 0xL00000000000000008000000000000000, %abs
|
||||||
|
%op2 = load fp128 , fp128 *%ptr2
|
||||||
|
%res = fdiv fp128 %negabs, %op2
|
||||||
|
store fp128 %res, fp128 *%ptr
|
||||||
|
ret void
|
||||||
|
}
|
|
@ -1,6 +1,8 @@
|
||||||
; Test 32-bit floating-point addition.
|
; Test 32-bit floating-point addition.
|
||||||
;
|
;
|
||||||
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
|
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \
|
||||||
|
; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s
|
||||||
|
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
|
||||||
|
|
||||||
declare float @foo()
|
declare float @foo()
|
||||||
|
|
||||||
|
@ -76,7 +78,7 @@ define float @f6(float %f1, float *%base, i64 %index) {
|
||||||
define float @f7(float *%ptr0) {
|
define float @f7(float *%ptr0) {
|
||||||
; CHECK-LABEL: f7:
|
; CHECK-LABEL: f7:
|
||||||
; CHECK: brasl %r14, foo@PLT
|
; CHECK: brasl %r14, foo@PLT
|
||||||
; CHECK: aeb %f0, 16{{[04]}}(%r15)
|
; CHECK-SCALAR: aeb %f0, 16{{[04]}}(%r15)
|
||||||
; CHECK: br %r14
|
; CHECK: br %r14
|
||||||
%ptr1 = getelementptr float, float *%ptr0, i64 2
|
%ptr1 = getelementptr float, float *%ptr0, i64 2
|
||||||
%ptr2 = getelementptr float, float *%ptr0, i64 4
|
%ptr2 = getelementptr float, float *%ptr0, i64 4
|
||||||
|
|
|
@ -1,7 +1,10 @@
|
||||||
; Test 32-bit floating-point comparison. The tests assume a z10 implementation
|
; Test 32-bit floating-point comparison. The tests assume a z10 implementation
|
||||||
; of select, using conditional branches rather than LOCGR.
|
; of select, using conditional branches rather than LOCGR.
|
||||||
;
|
;
|
||||||
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
|
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \
|
||||||
|
; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s
|
||||||
|
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 \
|
||||||
|
; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-VECTOR %s
|
||||||
|
|
||||||
declare float @foo()
|
declare float @foo()
|
||||||
|
|
||||||
|
@ -9,8 +12,9 @@ declare float @foo()
|
||||||
define i64 @f1(i64 %a, i64 %b, float %f1, float %f2) {
|
define i64 @f1(i64 %a, i64 %b, float %f1, float %f2) {
|
||||||
; CHECK-LABEL: f1:
|
; CHECK-LABEL: f1:
|
||||||
; CHECK: cebr %f0, %f2
|
; CHECK: cebr %f0, %f2
|
||||||
; CHECK-NEXT: ber %r14
|
; CHECK-SCALAR-NEXT: ber %r14
|
||||||
; CHECK: lgr %r2, %r3
|
; CHECK-SCALAR: lgr %r2, %r3
|
||||||
|
; CHECK-VECTOR-NEXT: locgrne %r2, %r3
|
||||||
; CHECK: br %r14
|
; CHECK: br %r14
|
||||||
%cond = fcmp oeq float %f1, %f2
|
%cond = fcmp oeq float %f1, %f2
|
||||||
%res = select i1 %cond, i64 %a, i64 %b
|
%res = select i1 %cond, i64 %a, i64 %b
|
||||||
|
@ -21,8 +25,9 @@ define i64 @f1(i64 %a, i64 %b, float %f1, float %f2) {
|
||||||
define i64 @f2(i64 %a, i64 %b, float %f1, float *%ptr) {
|
define i64 @f2(i64 %a, i64 %b, float %f1, float *%ptr) {
|
||||||
; CHECK-LABEL: f2:
|
; CHECK-LABEL: f2:
|
||||||
; CHECK: ceb %f0, 0(%r4)
|
; CHECK: ceb %f0, 0(%r4)
|
||||||
; CHECK-NEXT: ber %r14
|
; CHECK-SCALAR-NEXT: ber %r14
|
||||||
; CHECK: lgr %r2, %r3
|
; CHECK-SCALAR: lgr %r2, %r3
|
||||||
|
; CHECK-VECTOR-NEXT: locgrne %r2, %r3
|
||||||
; CHECK: br %r14
|
; CHECK: br %r14
|
||||||
%f2 = load float , float *%ptr
|
%f2 = load float , float *%ptr
|
||||||
%cond = fcmp oeq float %f1, %f2
|
%cond = fcmp oeq float %f1, %f2
|
||||||
|
@ -34,8 +39,9 @@ define i64 @f2(i64 %a, i64 %b, float %f1, float *%ptr) {
|
||||||
define i64 @f3(i64 %a, i64 %b, float %f1, float *%base) {
|
define i64 @f3(i64 %a, i64 %b, float %f1, float *%base) {
|
||||||
; CHECK-LABEL: f3:
|
; CHECK-LABEL: f3:
|
||||||
; CHECK: ceb %f0, 4092(%r4)
|
; CHECK: ceb %f0, 4092(%r4)
|
||||||
; CHECK-NEXT: ber %r14
|
; CHECK-SCALAR-NEXT: ber %r14
|
||||||
; CHECK: lgr %r2, %r3
|
; CHECK-SCALAR: lgr %r2, %r3
|
||||||
|
; CHECK-VECTOR-NEXT: locgrne %r2, %r3
|
||||||
; CHECK: br %r14
|
; CHECK: br %r14
|
||||||
%ptr = getelementptr float, float *%base, i64 1023
|
%ptr = getelementptr float, float *%base, i64 1023
|
||||||
%f2 = load float , float *%ptr
|
%f2 = load float , float *%ptr
|
||||||
|
@ -50,8 +56,9 @@ define i64 @f4(i64 %a, i64 %b, float %f1, float *%base) {
|
||||||
; CHECK-LABEL: f4:
|
; CHECK-LABEL: f4:
|
||||||
; CHECK: aghi %r4, 4096
|
; CHECK: aghi %r4, 4096
|
||||||
; CHECK: ceb %f0, 0(%r4)
|
; CHECK: ceb %f0, 0(%r4)
|
||||||
; CHECK-NEXT: ber %r14
|
; CHECK-SCALAR-NEXT: ber %r14
|
||||||
; CHECK: lgr %r2, %r3
|
; CHECK-SCALAR: lgr %r2, %r3
|
||||||
|
; CHECK-VECTOR-NEXT: locgrne %r2, %r3
|
||||||
; CHECK: br %r14
|
; CHECK: br %r14
|
||||||
%ptr = getelementptr float, float *%base, i64 1024
|
%ptr = getelementptr float, float *%base, i64 1024
|
||||||
%f2 = load float , float *%ptr
|
%f2 = load float , float *%ptr
|
||||||
|
@ -65,8 +72,9 @@ define i64 @f5(i64 %a, i64 %b, float %f1, float *%base) {
|
||||||
; CHECK-LABEL: f5:
|
; CHECK-LABEL: f5:
|
||||||
; CHECK: aghi %r4, -4
|
; CHECK: aghi %r4, -4
|
||||||
; CHECK: ceb %f0, 0(%r4)
|
; CHECK: ceb %f0, 0(%r4)
|
||||||
; CHECK-NEXT: ber %r14
|
; CHECK-SCALAR-NEXT: ber %r14
|
||||||
; CHECK: lgr %r2, %r3
|
; CHECK-SCALAR: lgr %r2, %r3
|
||||||
|
; CHECK-VECTOR-NEXT: locgrne %r2, %r3
|
||||||
; CHECK: br %r14
|
; CHECK: br %r14
|
||||||
%ptr = getelementptr float, float *%base, i64 -1
|
%ptr = getelementptr float, float *%base, i64 -1
|
||||||
%f2 = load float , float *%ptr
|
%f2 = load float , float *%ptr
|
||||||
|
@ -80,8 +88,9 @@ define i64 @f6(i64 %a, i64 %b, float %f1, float *%base, i64 %index) {
|
||||||
; CHECK-LABEL: f6:
|
; CHECK-LABEL: f6:
|
||||||
; CHECK: sllg %r1, %r5, 2
|
; CHECK: sllg %r1, %r5, 2
|
||||||
; CHECK: ceb %f0, 400(%r1,%r4)
|
; CHECK: ceb %f0, 400(%r1,%r4)
|
||||||
; CHECK-NEXT: ber %r14
|
; CHECK-SCALAR-NEXT: ber %r14
|
||||||
; CHECK: lgr %r2, %r3
|
; CHECK-SCALAR: lgr %r2, %r3
|
||||||
|
; CHECK-VECTOR-NEXT: locgrne %r2, %r3
|
||||||
; CHECK: br %r14
|
; CHECK: br %r14
|
||||||
%ptr1 = getelementptr float, float *%base, i64 %index
|
%ptr1 = getelementptr float, float *%base, i64 %index
|
||||||
%ptr2 = getelementptr float, float *%ptr1, i64 100
|
%ptr2 = getelementptr float, float *%ptr1, i64 100
|
||||||
|
@ -95,7 +104,7 @@ define i64 @f6(i64 %a, i64 %b, float %f1, float *%base, i64 %index) {
|
||||||
define float @f7(float *%ptr0) {
|
define float @f7(float *%ptr0) {
|
||||||
; CHECK-LABEL: f7:
|
; CHECK-LABEL: f7:
|
||||||
; CHECK: brasl %r14, foo@PLT
|
; CHECK: brasl %r14, foo@PLT
|
||||||
; CHECK: ceb {{%f[0-9]+}}, 16{{[04]}}(%r15)
|
; CHECK-SCALAR: ceb {{%f[0-9]+}}, 16{{[04]}}(%r15)
|
||||||
; CHECK: br %r14
|
; CHECK: br %r14
|
||||||
%ptr1 = getelementptr float, float *%ptr0, i64 2
|
%ptr1 = getelementptr float, float *%ptr0, i64 2
|
||||||
%ptr2 = getelementptr float, float *%ptr0, i64 4
|
%ptr2 = getelementptr float, float *%ptr0, i64 4
|
||||||
|
@ -153,8 +162,9 @@ define float @f7(float *%ptr0) {
|
||||||
define i64 @f8(i64 %a, i64 %b, float %f) {
|
define i64 @f8(i64 %a, i64 %b, float %f) {
|
||||||
; CHECK-LABEL: f8:
|
; CHECK-LABEL: f8:
|
||||||
; CHECK: ltebr %f0, %f0
|
; CHECK: ltebr %f0, %f0
|
||||||
; CHECK-NEXT: ber %r14
|
; CHECK-SCALAR-NEXT: ber %r14
|
||||||
; CHECK: lgr %r2, %r3
|
; CHECK-SCALAR: lgr %r2, %r3
|
||||||
|
; CHECK-VECTOR-NEXT: locgrne %r2, %r3
|
||||||
; CHECK: br %r14
|
; CHECK: br %r14
|
||||||
%cond = fcmp oeq float %f, 0.0
|
%cond = fcmp oeq float %f, 0.0
|
||||||
%res = select i1 %cond, i64 %a, i64 %b
|
%res = select i1 %cond, i64 %a, i64 %b
|
||||||
|
@ -166,8 +176,9 @@ define i64 @f8(i64 %a, i64 %b, float %f) {
|
||||||
define i64 @f9(i64 %a, i64 %b, float %f2, float *%ptr) {
|
define i64 @f9(i64 %a, i64 %b, float %f2, float *%ptr) {
|
||||||
; CHECK-LABEL: f9:
|
; CHECK-LABEL: f9:
|
||||||
; CHECK: ceb %f0, 0(%r4)
|
; CHECK: ceb %f0, 0(%r4)
|
||||||
; CHECK-NEXT: ber %r14
|
; CHECK-SCALAR-NEXT: ber %r14
|
||||||
; CHECK: lgr %r2, %r3
|
; CHECK-SCALAR: lgr %r2, %r3
|
||||||
|
; CHECK-VECTOR-NEXT: locgrne %r2, %r3
|
||||||
; CHECK: br %r14
|
; CHECK: br %r14
|
||||||
%f1 = load float , float *%ptr
|
%f1 = load float , float *%ptr
|
||||||
%cond = fcmp oeq float %f1, %f2
|
%cond = fcmp oeq float %f1, %f2
|
||||||
|
@ -179,8 +190,9 @@ define i64 @f9(i64 %a, i64 %b, float %f2, float *%ptr) {
|
||||||
define i64 @f10(i64 %a, i64 %b, float %f2, float *%ptr) {
|
define i64 @f10(i64 %a, i64 %b, float %f2, float *%ptr) {
|
||||||
; CHECK-LABEL: f10:
|
; CHECK-LABEL: f10:
|
||||||
; CHECK: ceb %f0, 0(%r4)
|
; CHECK: ceb %f0, 0(%r4)
|
||||||
; CHECK-NEXT: blhr %r14
|
; CHECK-SCALAR-NEXT: blhr %r14
|
||||||
; CHECK: lgr %r2, %r3
|
; CHECK-SCALAR: lgr %r2, %r3
|
||||||
|
; CHECK-VECTOR-NEXT: locgrnlh %r2, %r3
|
||||||
; CHECK: br %r14
|
; CHECK: br %r14
|
||||||
%f1 = load float , float *%ptr
|
%f1 = load float , float *%ptr
|
||||||
%cond = fcmp one float %f1, %f2
|
%cond = fcmp one float %f1, %f2
|
||||||
|
@ -192,8 +204,9 @@ define i64 @f10(i64 %a, i64 %b, float %f2, float *%ptr) {
|
||||||
define i64 @f11(i64 %a, i64 %b, float %f2, float *%ptr) {
|
define i64 @f11(i64 %a, i64 %b, float %f2, float *%ptr) {
|
||||||
; CHECK-LABEL: f11:
|
; CHECK-LABEL: f11:
|
||||||
; CHECK: ceb %f0, 0(%r4)
|
; CHECK: ceb %f0, 0(%r4)
|
||||||
; CHECK-NEXT: bhr %r14
|
; CHECK-SCALAR-NEXT: bhr %r14
|
||||||
; CHECK: lgr %r2, %r3
|
; CHECK-SCALAR: lgr %r2, %r3
|
||||||
|
; CHECK-VECTOR-NEXT: locgrnh %r2, %r3
|
||||||
; CHECK: br %r14
|
; CHECK: br %r14
|
||||||
%f1 = load float , float *%ptr
|
%f1 = load float , float *%ptr
|
||||||
%cond = fcmp olt float %f1, %f2
|
%cond = fcmp olt float %f1, %f2
|
||||||
|
@ -205,8 +218,9 @@ define i64 @f11(i64 %a, i64 %b, float %f2, float *%ptr) {
|
||||||
define i64 @f12(i64 %a, i64 %b, float %f2, float *%ptr) {
|
define i64 @f12(i64 %a, i64 %b, float %f2, float *%ptr) {
|
||||||
; CHECK-LABEL: f12:
|
; CHECK-LABEL: f12:
|
||||||
; CHECK: ceb %f0, 0(%r4)
|
; CHECK: ceb %f0, 0(%r4)
|
||||||
; CHECK-NEXT: bher %r14
|
; CHECK-SCALAR-NEXT: bher %r14
|
||||||
; CHECK: lgr %r2, %r3
|
; CHECK-SCALAR: lgr %r2, %r3
|
||||||
|
; CHECK-VECTOR-NEXT: locgrnhe %r2, %r3
|
||||||
; CHECK: br %r14
|
; CHECK: br %r14
|
||||||
%f1 = load float , float *%ptr
|
%f1 = load float , float *%ptr
|
||||||
%cond = fcmp ole float %f1, %f2
|
%cond = fcmp ole float %f1, %f2
|
||||||
|
@ -218,8 +232,9 @@ define i64 @f12(i64 %a, i64 %b, float %f2, float *%ptr) {
|
||||||
define i64 @f13(i64 %a, i64 %b, float %f2, float *%ptr) {
|
define i64 @f13(i64 %a, i64 %b, float %f2, float *%ptr) {
|
||||||
; CHECK-LABEL: f13:
|
; CHECK-LABEL: f13:
|
||||||
; CHECK: ceb %f0, 0(%r4)
|
; CHECK: ceb %f0, 0(%r4)
|
||||||
; CHECK-NEXT: bler %r14
|
; CHECK-SCALAR-NEXT: bler %r14
|
||||||
; CHECK: lgr %r2, %r3
|
; CHECK-SCALAR: lgr %r2, %r3
|
||||||
|
; CHECK-VECTOR-NEXT: locgrnle %r2, %r3
|
||||||
; CHECK: br %r14
|
; CHECK: br %r14
|
||||||
%f1 = load float , float *%ptr
|
%f1 = load float , float *%ptr
|
||||||
%cond = fcmp oge float %f1, %f2
|
%cond = fcmp oge float %f1, %f2
|
||||||
|
@ -231,8 +246,9 @@ define i64 @f13(i64 %a, i64 %b, float %f2, float *%ptr) {
|
||||||
define i64 @f14(i64 %a, i64 %b, float %f2, float *%ptr) {
|
define i64 @f14(i64 %a, i64 %b, float %f2, float *%ptr) {
|
||||||
; CHECK-LABEL: f14:
|
; CHECK-LABEL: f14:
|
||||||
; CHECK: ceb %f0, 0(%r4)
|
; CHECK: ceb %f0, 0(%r4)
|
||||||
; CHECK-NEXT: blr %r14
|
; CHECK-SCALAR-NEXT: blr %r14
|
||||||
; CHECK: lgr %r2, %r3
|
; CHECK-SCALAR: lgr %r2, %r3
|
||||||
|
; CHECK-VECTOR-NEXT: locgrnl %r2, %r3
|
||||||
; CHECK: br %r14
|
; CHECK: br %r14
|
||||||
%f1 = load float , float *%ptr
|
%f1 = load float , float *%ptr
|
||||||
%cond = fcmp ogt float %f1, %f2
|
%cond = fcmp ogt float %f1, %f2
|
||||||
|
@ -244,8 +260,9 @@ define i64 @f14(i64 %a, i64 %b, float %f2, float *%ptr) {
|
||||||
define i64 @f15(i64 %a, i64 %b, float %f2, float *%ptr) {
|
define i64 @f15(i64 %a, i64 %b, float %f2, float *%ptr) {
|
||||||
; CHECK-LABEL: f15:
|
; CHECK-LABEL: f15:
|
||||||
; CHECK: ceb %f0, 0(%r4)
|
; CHECK: ceb %f0, 0(%r4)
|
||||||
; CHECK-NEXT: bnlhr %r14
|
; CHECK-SCALAR-NEXT: bnlhr %r14
|
||||||
; CHECK: lgr %r2, %r3
|
; CHECK-SCALAR: lgr %r2, %r3
|
||||||
|
; CHECK-VECTOR-NEXT: locgrlh %r2, %r3
|
||||||
; CHECK: br %r14
|
; CHECK: br %r14
|
||||||
%f1 = load float , float *%ptr
|
%f1 = load float , float *%ptr
|
||||||
%cond = fcmp ueq float %f1, %f2
|
%cond = fcmp ueq float %f1, %f2
|
||||||
|
@ -257,8 +274,9 @@ define i64 @f15(i64 %a, i64 %b, float %f2, float *%ptr) {
|
||||||
define i64 @f16(i64 %a, i64 %b, float %f2, float *%ptr) {
|
define i64 @f16(i64 %a, i64 %b, float %f2, float *%ptr) {
|
||||||
; CHECK-LABEL: f16:
|
; CHECK-LABEL: f16:
|
||||||
; CHECK: ceb %f0, 0(%r4)
|
; CHECK: ceb %f0, 0(%r4)
|
||||||
; CHECK-NEXT: bner %r14
|
; CHECK-SCALAR-NEXT: bner %r14
|
||||||
; CHECK: lgr %r2, %r3
|
; CHECK-SCALAR: lgr %r2, %r3
|
||||||
|
; CHECK-VECTOR-NEXT: locgre %r2, %r3
|
||||||
; CHECK: br %r14
|
; CHECK: br %r14
|
||||||
%f1 = load float , float *%ptr
|
%f1 = load float , float *%ptr
|
||||||
%cond = fcmp une float %f1, %f2
|
%cond = fcmp une float %f1, %f2
|
||||||
|
@ -270,8 +288,9 @@ define i64 @f16(i64 %a, i64 %b, float %f2, float *%ptr) {
|
||||||
define i64 @f17(i64 %a, i64 %b, float %f2, float *%ptr) {
|
define i64 @f17(i64 %a, i64 %b, float %f2, float *%ptr) {
|
||||||
; CHECK-LABEL: f17:
|
; CHECK-LABEL: f17:
|
||||||
; CHECK: ceb %f0, 0(%r4)
|
; CHECK: ceb %f0, 0(%r4)
|
||||||
; CHECK-NEXT: bnler %r14
|
; CHECK-SCALAR-NEXT: bnler %r14
|
||||||
; CHECK: lgr %r2, %r3
|
; CHECK-SCALAR: lgr %r2, %r3
|
||||||
|
; CHECK-VECTOR-NEXT: locgrle %r2, %r3
|
||||||
; CHECK: br %r14
|
; CHECK: br %r14
|
||||||
%f1 = load float , float *%ptr
|
%f1 = load float , float *%ptr
|
||||||
%cond = fcmp ult float %f1, %f2
|
%cond = fcmp ult float %f1, %f2
|
||||||
|
@ -283,8 +302,9 @@ define i64 @f17(i64 %a, i64 %b, float %f2, float *%ptr) {
|
||||||
define i64 @f18(i64 %a, i64 %b, float %f2, float *%ptr) {
|
define i64 @f18(i64 %a, i64 %b, float %f2, float *%ptr) {
|
||||||
; CHECK-LABEL: f18:
|
; CHECK-LABEL: f18:
|
||||||
; CHECK: ceb %f0, 0(%r4)
|
; CHECK: ceb %f0, 0(%r4)
|
||||||
; CHECK-NEXT: bnlr %r14
|
; CHECK-SCALAR-NEXT: bnlr %r14
|
||||||
; CHECK: lgr %r2, %r3
|
; CHECK-SCALAR: lgr %r2, %r3
|
||||||
|
; CHECK-VECTOR-NEXT: locgrl %r2, %r3
|
||||||
; CHECK: br %r14
|
; CHECK: br %r14
|
||||||
%f1 = load float , float *%ptr
|
%f1 = load float , float *%ptr
|
||||||
%cond = fcmp ule float %f1, %f2
|
%cond = fcmp ule float %f1, %f2
|
||||||
|
@ -296,8 +316,9 @@ define i64 @f18(i64 %a, i64 %b, float %f2, float *%ptr) {
|
||||||
define i64 @f19(i64 %a, i64 %b, float %f2, float *%ptr) {
|
define i64 @f19(i64 %a, i64 %b, float %f2, float *%ptr) {
|
||||||
; CHECK-LABEL: f19:
|
; CHECK-LABEL: f19:
|
||||||
; CHECK: ceb %f0, 0(%r4)
|
; CHECK: ceb %f0, 0(%r4)
|
||||||
; CHECK-NEXT: bnhr %r14
|
; CHECK-SCALAR-NEXT: bnhr %r14
|
||||||
; CHECK: lgr %r2, %r3
|
; CHECK-SCALAR: lgr %r2, %r3
|
||||||
|
; CHECK-VECTOR-NEXT: locgrh %r2, %r3
|
||||||
; CHECK: br %r14
|
; CHECK: br %r14
|
||||||
%f1 = load float , float *%ptr
|
%f1 = load float , float *%ptr
|
||||||
%cond = fcmp uge float %f1, %f2
|
%cond = fcmp uge float %f1, %f2
|
||||||
|
@ -309,8 +330,9 @@ define i64 @f19(i64 %a, i64 %b, float %f2, float *%ptr) {
|
||||||
define i64 @f20(i64 %a, i64 %b, float %f2, float *%ptr) {
|
define i64 @f20(i64 %a, i64 %b, float %f2, float *%ptr) {
|
||||||
; CHECK-LABEL: f20:
|
; CHECK-LABEL: f20:
|
||||||
; CHECK: ceb %f0, 0(%r4)
|
; CHECK: ceb %f0, 0(%r4)
|
||||||
; CHECK-NEXT: bnher %r14
|
; CHECK-SCALAR-NEXT: bnher %r14
|
||||||
; CHECK: lgr %r2, %r3
|
; CHECK-SCALAR: lgr %r2, %r3
|
||||||
|
; CHECK-VECTOR-NEXT: locgrhe %r2, %r3
|
||||||
; CHECK: br %r14
|
; CHECK: br %r14
|
||||||
%f1 = load float , float *%ptr
|
%f1 = load float , float *%ptr
|
||||||
%cond = fcmp ugt float %f1, %f2
|
%cond = fcmp ugt float %f1, %f2
|
||||||
|
|
|
@ -1,6 +1,8 @@
|
||||||
; Test 32-bit floating-point division.
|
; Test 32-bit floating-point division.
|
||||||
;
|
;
|
||||||
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
|
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \
|
||||||
|
; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s
|
||||||
|
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
|
||||||
|
|
||||||
declare float @foo()
|
declare float @foo()
|
||||||
|
|
||||||
|
@ -76,7 +78,7 @@ define float @f6(float %f1, float *%base, i64 %index) {
|
||||||
define float @f7(float *%ptr0) {
|
define float @f7(float *%ptr0) {
|
||||||
; CHECK-LABEL: f7:
|
; CHECK-LABEL: f7:
|
||||||
; CHECK: brasl %r14, foo@PLT
|
; CHECK: brasl %r14, foo@PLT
|
||||||
; CHECK: deb %f0, 16{{[04]}}(%r15)
|
; CHECK-SCALAR: deb %f0, 16{{[04]}}(%r15)
|
||||||
; CHECK: br %r14
|
; CHECK: br %r14
|
||||||
%ptr1 = getelementptr float, float *%ptr0, i64 2
|
%ptr1 = getelementptr float, float *%ptr0, i64 2
|
||||||
%ptr2 = getelementptr float, float *%ptr0, i64 4
|
%ptr2 = getelementptr float, float *%ptr0, i64 4
|
||||||
|
|
|
@ -1,6 +1,8 @@
|
||||||
; Test multiplication of two f32s, producing an f32 result.
|
; Test multiplication of two f32s, producing an f32 result.
|
||||||
;
|
;
|
||||||
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
|
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \
|
||||||
|
; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s
|
||||||
|
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
|
||||||
|
|
||||||
declare float @foo()
|
declare float @foo()
|
||||||
|
|
||||||
|
@ -76,7 +78,7 @@ define float @f6(float %f1, float *%base, i64 %index) {
|
||||||
define float @f7(float *%ptr0) {
|
define float @f7(float *%ptr0) {
|
||||||
; CHECK-LABEL: f7:
|
; CHECK-LABEL: f7:
|
||||||
; CHECK: brasl %r14, foo@PLT
|
; CHECK: brasl %r14, foo@PLT
|
||||||
; CHECK: meeb %f0, 16{{[04]}}(%r15)
|
; CHECK-SCALAR: meeb %f0, 16{{[04]}}(%r15)
|
||||||
; CHECK: br %r14
|
; CHECK: br %r14
|
||||||
%ptr1 = getelementptr float, float *%ptr0, i64 2
|
%ptr1 = getelementptr float, float *%ptr0, i64 2
|
||||||
%ptr2 = getelementptr float, float *%ptr0, i64 4
|
%ptr2 = getelementptr float, float *%ptr0, i64 4
|
||||||
|
|
|
@ -1,11 +1,15 @@
|
||||||
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
|
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \
|
||||||
|
; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s
|
||||||
|
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 \
|
||||||
|
; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-VECTOR %s
|
||||||
|
|
||||||
declare float @llvm.fma.f32(float %f1, float %f2, float %f3)
|
declare float @llvm.fma.f32(float %f1, float %f2, float %f3)
|
||||||
|
|
||||||
define float @f1(float %f1, float %f2, float %acc) {
|
define float @f1(float %f1, float %f2, float %acc) {
|
||||||
; CHECK-LABEL: f1:
|
; CHECK-LABEL: f1:
|
||||||
; CHECK: maebr %f4, %f0, %f2
|
; CHECK-SCALAR: maebr %f4, %f0, %f2
|
||||||
; CHECK: ler %f0, %f4
|
; CHECK-SCALAR: ler %f0, %f4
|
||||||
|
; CHECK-VECTOR: wfmasb %f0, %f0, %f2, %f4
|
||||||
; CHECK: br %r14
|
; CHECK: br %r14
|
||||||
%res = call float @llvm.fma.f32 (float %f1, float %f2, float %acc)
|
%res = call float @llvm.fma.f32 (float %f1, float %f2, float %acc)
|
||||||
ret float %res
|
ret float %res
|
||||||
|
@ -14,7 +18,8 @@ define float @f1(float %f1, float %f2, float %acc) {
|
||||||
define float @f2(float %f1, float *%ptr, float %acc) {
|
define float @f2(float %f1, float *%ptr, float %acc) {
|
||||||
; CHECK-LABEL: f2:
|
; CHECK-LABEL: f2:
|
||||||
; CHECK: maeb %f2, %f0, 0(%r2)
|
; CHECK: maeb %f2, %f0, 0(%r2)
|
||||||
; CHECK: ler %f0, %f2
|
; CHECK-SCALAR: ler %f0, %f2
|
||||||
|
; CHECK-VECTOR: ldr %f0, %f2
|
||||||
; CHECK: br %r14
|
; CHECK: br %r14
|
||||||
%f2 = load float , float *%ptr
|
%f2 = load float , float *%ptr
|
||||||
%res = call float @llvm.fma.f32 (float %f1, float %f2, float %acc)
|
%res = call float @llvm.fma.f32 (float %f1, float %f2, float %acc)
|
||||||
|
@ -24,7 +29,8 @@ define float @f2(float %f1, float *%ptr, float %acc) {
|
||||||
define float @f3(float %f1, float *%base, float %acc) {
|
define float @f3(float %f1, float *%base, float %acc) {
|
||||||
; CHECK-LABEL: f3:
|
; CHECK-LABEL: f3:
|
||||||
; CHECK: maeb %f2, %f0, 4092(%r2)
|
; CHECK: maeb %f2, %f0, 4092(%r2)
|
||||||
; CHECK: ler %f0, %f2
|
; CHECK-SCALAR: ler %f0, %f2
|
||||||
|
; CHECK-VECTOR: ldr %f0, %f2
|
||||||
; CHECK: br %r14
|
; CHECK: br %r14
|
||||||
%ptr = getelementptr float, float *%base, i64 1023
|
%ptr = getelementptr float, float *%base, i64 1023
|
||||||
%f2 = load float , float *%ptr
|
%f2 = load float , float *%ptr
|
||||||
|
@ -39,7 +45,8 @@ define float @f4(float %f1, float *%base, float %acc) {
|
||||||
; CHECK-LABEL: f4:
|
; CHECK-LABEL: f4:
|
||||||
; CHECK: aghi %r2, 4096
|
; CHECK: aghi %r2, 4096
|
||||||
; CHECK: maeb %f2, %f0, 0(%r2)
|
; CHECK: maeb %f2, %f0, 0(%r2)
|
||||||
; CHECK: ler %f0, %f2
|
; CHECK-SCALAR: ler %f0, %f2
|
||||||
|
; CHECK-VECTOR: ldr %f0, %f2
|
||||||
; CHECK: br %r14
|
; CHECK: br %r14
|
||||||
%ptr = getelementptr float, float *%base, i64 1024
|
%ptr = getelementptr float, float *%base, i64 1024
|
||||||
%f2 = load float , float *%ptr
|
%f2 = load float , float *%ptr
|
||||||
|
@ -54,7 +61,8 @@ define float @f5(float %f1, float *%base, float %acc) {
|
||||||
; CHECK-LABEL: f5:
|
; CHECK-LABEL: f5:
|
||||||
; CHECK: aghi %r2, -4
|
; CHECK: aghi %r2, -4
|
||||||
; CHECK: maeb %f2, %f0, 0(%r2)
|
; CHECK: maeb %f2, %f0, 0(%r2)
|
||||||
; CHECK: ler %f0, %f2
|
; CHECK-SCALAR: ler %f0, %f2
|
||||||
|
; CHECK-VECTOR: ldr %f0, %f2
|
||||||
; CHECK: br %r14
|
; CHECK: br %r14
|
||||||
%ptr = getelementptr float, float *%base, i64 -1
|
%ptr = getelementptr float, float *%base, i64 -1
|
||||||
%f2 = load float , float *%ptr
|
%f2 = load float , float *%ptr
|
||||||
|
@ -66,7 +74,8 @@ define float @f6(float %f1, float *%base, i64 %index, float %acc) {
|
||||||
; CHECK-LABEL: f6:
|
; CHECK-LABEL: f6:
|
||||||
; CHECK: sllg %r1, %r3, 2
|
; CHECK: sllg %r1, %r3, 2
|
||||||
; CHECK: maeb %f2, %f0, 0(%r1,%r2)
|
; CHECK: maeb %f2, %f0, 0(%r1,%r2)
|
||||||
; CHECK: ler %f0, %f2
|
; CHECK-SCALAR: ler %f0, %f2
|
||||||
|
; CHECK-VECTOR: ldr %f0, %f2
|
||||||
; CHECK: br %r14
|
; CHECK: br %r14
|
||||||
%ptr = getelementptr float, float *%base, i64 %index
|
%ptr = getelementptr float, float *%base, i64 %index
|
||||||
%f2 = load float , float *%ptr
|
%f2 = load float , float *%ptr
|
||||||
|
@ -78,7 +87,8 @@ define float @f7(float %f1, float *%base, i64 %index, float %acc) {
|
||||||
; CHECK-LABEL: f7:
|
; CHECK-LABEL: f7:
|
||||||
; CHECK: sllg %r1, %r3, 2
|
; CHECK: sllg %r1, %r3, 2
|
||||||
; CHECK: maeb %f2, %f0, 4092({{%r1,%r2|%r2,%r1}})
|
; CHECK: maeb %f2, %f0, 4092({{%r1,%r2|%r2,%r1}})
|
||||||
; CHECK: ler %f0, %f2
|
; CHECK-SCALAR: ler %f0, %f2
|
||||||
|
; CHECK-VECTOR: ldr %f0, %f2
|
||||||
; CHECK: br %r14
|
; CHECK: br %r14
|
||||||
%index2 = add i64 %index, 1023
|
%index2 = add i64 %index, 1023
|
||||||
%ptr = getelementptr float, float *%base, i64 %index2
|
%ptr = getelementptr float, float *%base, i64 %index2
|
||||||
|
@ -92,7 +102,8 @@ define float @f8(float %f1, float *%base, i64 %index, float %acc) {
|
||||||
; CHECK: sllg %r1, %r3, 2
|
; CHECK: sllg %r1, %r3, 2
|
||||||
; CHECK: lay %r1, 4096({{%r1,%r2|%r2,%r1}})
|
; CHECK: lay %r1, 4096({{%r1,%r2|%r2,%r1}})
|
||||||
; CHECK: maeb %f2, %f0, 0(%r1)
|
; CHECK: maeb %f2, %f0, 0(%r1)
|
||||||
; CHECK: ler %f0, %f2
|
; CHECK-SCALAR: ler %f0, %f2
|
||||||
|
; CHECK-VECTOR: ldr %f0, %f2
|
||||||
; CHECK: br %r14
|
; CHECK: br %r14
|
||||||
%index2 = add i64 %index, 1024
|
%index2 = add i64 %index, 1024
|
||||||
%ptr = getelementptr float, float *%base, i64 %index2
|
%ptr = getelementptr float, float *%base, i64 %index2
|
||||||
|
|
|
@ -1,11 +1,15 @@
|
||||||
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
|
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \
|
||||||
|
; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s
|
||||||
|
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 \
|
||||||
|
; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-VECTOR %s
|
||||||
|
|
||||||
declare float @llvm.fma.f32(float %f1, float %f2, float %f3)
|
declare float @llvm.fma.f32(float %f1, float %f2, float %f3)
|
||||||
|
|
||||||
define float @f1(float %f1, float %f2, float %acc) {
|
define float @f1(float %f1, float %f2, float %acc) {
|
||||||
; CHECK-LABEL: f1:
|
; CHECK-LABEL: f1:
|
||||||
; CHECK: msebr %f4, %f0, %f2
|
; CHECK-SCALAR: msebr %f4, %f0, %f2
|
||||||
; CHECK: ler %f0, %f4
|
; CHECK-SCALAR: ler %f0, %f4
|
||||||
|
; CHECK-VECTOR: wfmssb %f0, %f0, %f2, %f4
|
||||||
; CHECK: br %r14
|
; CHECK: br %r14
|
||||||
%negacc = fsub float -0.0, %acc
|
%negacc = fsub float -0.0, %acc
|
||||||
%res = call float @llvm.fma.f32 (float %f1, float %f2, float %negacc)
|
%res = call float @llvm.fma.f32 (float %f1, float %f2, float %negacc)
|
||||||
|
@ -15,7 +19,8 @@ define float @f1(float %f1, float %f2, float %acc) {
|
||||||
define float @f2(float %f1, float *%ptr, float %acc) {
|
define float @f2(float %f1, float *%ptr, float %acc) {
|
||||||
; CHECK-LABEL: f2:
|
; CHECK-LABEL: f2:
|
||||||
; CHECK: mseb %f2, %f0, 0(%r2)
|
; CHECK: mseb %f2, %f0, 0(%r2)
|
||||||
; CHECK: ler %f0, %f2
|
; CHECK-SCALAR: ler %f0, %f2
|
||||||
|
; CHECK-VECTOR: ldr %f0, %f2
|
||||||
; CHECK: br %r14
|
; CHECK: br %r14
|
||||||
%f2 = load float , float *%ptr
|
%f2 = load float , float *%ptr
|
||||||
%negacc = fsub float -0.0, %acc
|
%negacc = fsub float -0.0, %acc
|
||||||
|
@ -26,7 +31,8 @@ define float @f2(float %f1, float *%ptr, float %acc) {
|
||||||
define float @f3(float %f1, float *%base, float %acc) {
|
define float @f3(float %f1, float *%base, float %acc) {
|
||||||
; CHECK-LABEL: f3:
|
; CHECK-LABEL: f3:
|
||||||
; CHECK: mseb %f2, %f0, 4092(%r2)
|
; CHECK: mseb %f2, %f0, 4092(%r2)
|
||||||
; CHECK: ler %f0, %f2
|
; CHECK-SCALAR: ler %f0, %f2
|
||||||
|
; CHECK-VECTOR: ldr %f0, %f2
|
||||||
; CHECK: br %r14
|
; CHECK: br %r14
|
||||||
%ptr = getelementptr float, float *%base, i64 1023
|
%ptr = getelementptr float, float *%base, i64 1023
|
||||||
%f2 = load float , float *%ptr
|
%f2 = load float , float *%ptr
|
||||||
|
@ -42,7 +48,8 @@ define float @f4(float %f1, float *%base, float %acc) {
|
||||||
; CHECK-LABEL: f4:
|
; CHECK-LABEL: f4:
|
||||||
; CHECK: aghi %r2, 4096
|
; CHECK: aghi %r2, 4096
|
||||||
; CHECK: mseb %f2, %f0, 0(%r2)
|
; CHECK: mseb %f2, %f0, 0(%r2)
|
||||||
; CHECK: ler %f0, %f2
|
; CHECK-SCALAR: ler %f0, %f2
|
||||||
|
; CHECK-VECTOR: ldr %f0, %f2
|
||||||
; CHECK: br %r14
|
; CHECK: br %r14
|
||||||
%ptr = getelementptr float, float *%base, i64 1024
|
%ptr = getelementptr float, float *%base, i64 1024
|
||||||
%f2 = load float , float *%ptr
|
%f2 = load float , float *%ptr
|
||||||
|
@ -58,7 +65,8 @@ define float @f5(float %f1, float *%base, float %acc) {
|
||||||
; CHECK-LABEL: f5:
|
; CHECK-LABEL: f5:
|
||||||
; CHECK: aghi %r2, -4
|
; CHECK: aghi %r2, -4
|
||||||
; CHECK: mseb %f2, %f0, 0(%r2)
|
; CHECK: mseb %f2, %f0, 0(%r2)
|
||||||
; CHECK: ler %f0, %f2
|
; CHECK-SCALAR: ler %f0, %f2
|
||||||
|
; CHECK-VECTOR: ldr %f0, %f2
|
||||||
; CHECK: br %r14
|
; CHECK: br %r14
|
||||||
%ptr = getelementptr float, float *%base, i64 -1
|
%ptr = getelementptr float, float *%base, i64 -1
|
||||||
%f2 = load float , float *%ptr
|
%f2 = load float , float *%ptr
|
||||||
|
@ -71,7 +79,8 @@ define float @f6(float %f1, float *%base, i64 %index, float %acc) {
|
||||||
; CHECK-LABEL: f6:
|
; CHECK-LABEL: f6:
|
||||||
; CHECK: sllg %r1, %r3, 2
|
; CHECK: sllg %r1, %r3, 2
|
||||||
; CHECK: mseb %f2, %f0, 0(%r1,%r2)
|
; CHECK: mseb %f2, %f0, 0(%r1,%r2)
|
||||||
; CHECK: ler %f0, %f2
|
; CHECK-SCALAR: ler %f0, %f2
|
||||||
|
; CHECK-VECTOR: ldr %f0, %f2
|
||||||
; CHECK: br %r14
|
; CHECK: br %r14
|
||||||
%ptr = getelementptr float, float *%base, i64 %index
|
%ptr = getelementptr float, float *%base, i64 %index
|
||||||
%f2 = load float , float *%ptr
|
%f2 = load float , float *%ptr
|
||||||
|
@ -84,7 +93,8 @@ define float @f7(float %f1, float *%base, i64 %index, float %acc) {
|
||||||
; CHECK-LABEL: f7:
|
; CHECK-LABEL: f7:
|
||||||
; CHECK: sllg %r1, %r3, 2
|
; CHECK: sllg %r1, %r3, 2
|
||||||
; CHECK: mseb %f2, %f0, 4092({{%r1,%r2|%r2,%r1}})
|
; CHECK: mseb %f2, %f0, 4092({{%r1,%r2|%r2,%r1}})
|
||||||
; CHECK: ler %f0, %f2
|
; CHECK-SCALAR: ler %f0, %f2
|
||||||
|
; CHECK-VECTOR: ldr %f0, %f2
|
||||||
; CHECK: br %r14
|
; CHECK: br %r14
|
||||||
%index2 = add i64 %index, 1023
|
%index2 = add i64 %index, 1023
|
||||||
%ptr = getelementptr float, float *%base, i64 %index2
|
%ptr = getelementptr float, float *%base, i64 %index2
|
||||||
|
@ -99,7 +109,8 @@ define float @f8(float %f1, float *%base, i64 %index, float %acc) {
|
||||||
; CHECK: sllg %r1, %r3, 2
|
; CHECK: sllg %r1, %r3, 2
|
||||||
; CHECK: lay %r1, 4096({{%r1,%r2|%r2,%r1}})
|
; CHECK: lay %r1, 4096({{%r1,%r2|%r2,%r1}})
|
||||||
; CHECK: mseb %f2, %f0, 0(%r1)
|
; CHECK: mseb %f2, %f0, 0(%r1)
|
||||||
; CHECK: ler %f0, %f2
|
; CHECK-SCALAR: ler %f0, %f2
|
||||||
|
; CHECK-VECTOR: ldr %f0, %f2
|
||||||
; CHECK: br %r14
|
; CHECK: br %r14
|
||||||
%index2 = add i64 %index, 1024
|
%index2 = add i64 %index, 1024
|
||||||
%ptr = getelementptr float, float *%base, i64 %index2
|
%ptr = getelementptr float, float *%base, i64 %index2
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
|
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
|
||||||
|
|
||||||
declare double @llvm.fma.f64(double %f1, double %f2, double %f3)
|
declare double @llvm.fma.f64(double %f1, double %f2, double %f3)
|
||||||
|
declare float @llvm.fma.f32(float %f1, float %f2, float %f3)
|
||||||
|
|
||||||
define double @f1(double %f1, double %f2, double %acc) {
|
define double @f1(double %f1, double %f2, double %acc) {
|
||||||
; CHECK-LABEL: f1:
|
; CHECK-LABEL: f1:
|
||||||
|
@ -21,3 +22,22 @@ define double @f2(double %f1, double %f2, double %acc) {
|
||||||
ret double %negres
|
ret double %negres
|
||||||
}
|
}
|
||||||
|
|
||||||
|
define float @f3(float %f1, float %f2, float %acc) {
|
||||||
|
; CHECK-LABEL: f3:
|
||||||
|
; CHECK: wfnmasb %f0, %f0, %f2, %f4
|
||||||
|
; CHECK: br %r14
|
||||||
|
%res = call float @llvm.fma.f32 (float %f1, float %f2, float %acc)
|
||||||
|
%negres = fsub float -0.0, %res
|
||||||
|
ret float %negres
|
||||||
|
}
|
||||||
|
|
||||||
|
define float @f4(float %f1, float %f2, float %acc) {
|
||||||
|
; CHECK-LABEL: f4:
|
||||||
|
; CHECK: wfnmssb %f0, %f0, %f2, %f4
|
||||||
|
; CHECK: br %r14
|
||||||
|
%negacc = fsub float -0.0, %acc
|
||||||
|
%res = call float @llvm.fma.f32 (float %f1, float %f2, float %negacc)
|
||||||
|
%negres = fsub float -0.0, %res
|
||||||
|
ret float %negres
|
||||||
|
}
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,38 @@
|
||||||
|
; Test floating-point negation on z14.
|
||||||
|
;
|
||||||
|
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
|
||||||
|
|
||||||
|
; Test f32.
|
||||||
|
define float @f1(float %f) {
|
||||||
|
; CHECK-LABEL: f1:
|
||||||
|
; CHECK: lcdfr %f0, %f0
|
||||||
|
; CHECK: br %r14
|
||||||
|
%res = fsub float -0.0, %f
|
||||||
|
ret float %res
|
||||||
|
}
|
||||||
|
|
||||||
|
; Test f64.
|
||||||
|
define double @f2(double %f) {
|
||||||
|
; CHECK-LABEL: f2:
|
||||||
|
; CHECK: lcdfr %f0, %f0
|
||||||
|
; CHECK: br %r14
|
||||||
|
%res = fsub double -0.0, %f
|
||||||
|
ret double %res
|
||||||
|
}
|
||||||
|
|
||||||
|
; Test f128. With the loads and stores, a pure negation would probably
|
||||||
|
; be better implemented using an XI on the upper byte. Do some extra
|
||||||
|
; processing so that using FPRs is unequivocally better.
|
||||||
|
define void @f3(fp128 *%ptr, fp128 *%ptr2) {
|
||||||
|
; CHECK-LABEL: f3:
|
||||||
|
; CHECK: lcxbr
|
||||||
|
; CHECK: dxbr
|
||||||
|
; CHECK: br %r14
|
||||||
|
%orig = load fp128 , fp128 *%ptr
|
||||||
|
%negzero = fpext float -0.0 to fp128
|
||||||
|
%neg = fsub fp128 0xL00000000000000008000000000000000, %orig
|
||||||
|
%op2 = load fp128 , fp128 *%ptr2
|
||||||
|
%res = fdiv fp128 %neg, %op2
|
||||||
|
store fp128 %res, fp128 *%ptr
|
||||||
|
ret void
|
||||||
|
}
|
|
@ -0,0 +1,195 @@
|
||||||
|
; Test rounding functions for z14 and above.
|
||||||
|
;
|
||||||
|
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
|
||||||
|
|
||||||
|
; Test rint for f32.
|
||||||
|
declare float @llvm.rint.f32(float %f)
|
||||||
|
define float @f1(float %f) {
|
||||||
|
; CHECK-LABEL: f1:
|
||||||
|
; CHECK: fiebra %f0, 0, %f0, 0
|
||||||
|
; CHECK: br %r14
|
||||||
|
%res = call float @llvm.rint.f32(float %f)
|
||||||
|
ret float %res
|
||||||
|
}
|
||||||
|
|
||||||
|
; Test rint for f64.
|
||||||
|
declare double @llvm.rint.f64(double %f)
|
||||||
|
define double @f2(double %f) {
|
||||||
|
; CHECK-LABEL: f2:
|
||||||
|
; CHECK: fidbra %f0, 0, %f0, 0
|
||||||
|
; CHECK: br %r14
|
||||||
|
%res = call double @llvm.rint.f64(double %f)
|
||||||
|
ret double %res
|
||||||
|
}
|
||||||
|
|
||||||
|
; Test rint for f128.
|
||||||
|
declare fp128 @llvm.rint.f128(fp128 %f)
|
||||||
|
define void @f3(fp128 *%ptr) {
|
||||||
|
; CHECK-LABEL: f3:
|
||||||
|
; CHECK: fixbr %f0, 0, %f0
|
||||||
|
; CHECK: br %r14
|
||||||
|
%src = load fp128 , fp128 *%ptr
|
||||||
|
%res = call fp128 @llvm.rint.f128(fp128 %src)
|
||||||
|
store fp128 %res, fp128 *%ptr
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; Test nearbyint for f32.
|
||||||
|
declare float @llvm.nearbyint.f32(float %f)
|
||||||
|
define float @f4(float %f) {
|
||||||
|
; CHECK-LABEL: f4:
|
||||||
|
; CHECK: fiebra %f0, 0, %f0, 4
|
||||||
|
; CHECK: br %r14
|
||||||
|
%res = call float @llvm.nearbyint.f32(float %f)
|
||||||
|
ret float %res
|
||||||
|
}
|
||||||
|
|
||||||
|
; Test nearbyint for f64.
|
||||||
|
declare double @llvm.nearbyint.f64(double %f)
|
||||||
|
define double @f5(double %f) {
|
||||||
|
; CHECK-LABEL: f5:
|
||||||
|
; CHECK: fidbra %f0, 0, %f0, 4
|
||||||
|
; CHECK: br %r14
|
||||||
|
%res = call double @llvm.nearbyint.f64(double %f)
|
||||||
|
ret double %res
|
||||||
|
}
|
||||||
|
|
||||||
|
; Test nearbyint for f128.
|
||||||
|
declare fp128 @llvm.nearbyint.f128(fp128 %f)
|
||||||
|
define void @f6(fp128 *%ptr) {
|
||||||
|
; CHECK-LABEL: f6:
|
||||||
|
; CHECK: fixbra %f0, 0, %f0, 4
|
||||||
|
; CHECK: br %r14
|
||||||
|
%src = load fp128 , fp128 *%ptr
|
||||||
|
%res = call fp128 @llvm.nearbyint.f128(fp128 %src)
|
||||||
|
store fp128 %res, fp128 *%ptr
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; Test floor for f32.
|
||||||
|
declare float @llvm.floor.f32(float %f)
|
||||||
|
define float @f7(float %f) {
|
||||||
|
; CHECK-LABEL: f7:
|
||||||
|
; CHECK: fiebra %f0, 7, %f0, 4
|
||||||
|
; CHECK: br %r14
|
||||||
|
%res = call float @llvm.floor.f32(float %f)
|
||||||
|
ret float %res
|
||||||
|
}
|
||||||
|
|
||||||
|
; Test floor for f64.
|
||||||
|
declare double @llvm.floor.f64(double %f)
|
||||||
|
define double @f8(double %f) {
|
||||||
|
; CHECK-LABEL: f8:
|
||||||
|
; CHECK: fidbra %f0, 7, %f0, 4
|
||||||
|
; CHECK: br %r14
|
||||||
|
%res = call double @llvm.floor.f64(double %f)
|
||||||
|
ret double %res
|
||||||
|
}
|
||||||
|
|
||||||
|
; Test floor for f128.
|
||||||
|
declare fp128 @llvm.floor.f128(fp128 %f)
|
||||||
|
define void @f9(fp128 *%ptr) {
|
||||||
|
; CHECK-LABEL: f9:
|
||||||
|
; CHECK: fixbra %f0, 7, %f0, 4
|
||||||
|
; CHECK: br %r14
|
||||||
|
%src = load fp128 , fp128 *%ptr
|
||||||
|
%res = call fp128 @llvm.floor.f128(fp128 %src)
|
||||||
|
store fp128 %res, fp128 *%ptr
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; Test ceil for f32.
|
||||||
|
declare float @llvm.ceil.f32(float %f)
|
||||||
|
define float @f10(float %f) {
|
||||||
|
; CHECK-LABEL: f10:
|
||||||
|
; CHECK: fiebra %f0, 6, %f0, 4
|
||||||
|
; CHECK: br %r14
|
||||||
|
%res = call float @llvm.ceil.f32(float %f)
|
||||||
|
ret float %res
|
||||||
|
}
|
||||||
|
|
||||||
|
; Test ceil for f64.
|
||||||
|
declare double @llvm.ceil.f64(double %f)
|
||||||
|
define double @f11(double %f) {
|
||||||
|
; CHECK-LABEL: f11:
|
||||||
|
; CHECK: fidbra %f0, 6, %f0, 4
|
||||||
|
; CHECK: br %r14
|
||||||
|
%res = call double @llvm.ceil.f64(double %f)
|
||||||
|
ret double %res
|
||||||
|
}
|
||||||
|
|
||||||
|
; Test ceil for f128.
|
||||||
|
declare fp128 @llvm.ceil.f128(fp128 %f)
|
||||||
|
define void @f12(fp128 *%ptr) {
|
||||||
|
; CHECK-LABEL: f12:
|
||||||
|
; CHECK: fixbra %f0, 6, %f0, 4
|
||||||
|
; CHECK: br %r14
|
||||||
|
%src = load fp128 , fp128 *%ptr
|
||||||
|
%res = call fp128 @llvm.ceil.f128(fp128 %src)
|
||||||
|
store fp128 %res, fp128 *%ptr
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; Test trunc for f32.
|
||||||
|
declare float @llvm.trunc.f32(float %f)
|
||||||
|
define float @f13(float %f) {
|
||||||
|
; CHECK-LABEL: f13:
|
||||||
|
; CHECK: fiebra %f0, 5, %f0, 4
|
||||||
|
; CHECK: br %r14
|
||||||
|
%res = call float @llvm.trunc.f32(float %f)
|
||||||
|
ret float %res
|
||||||
|
}
|
||||||
|
|
||||||
|
; Test trunc for f64.
|
||||||
|
declare double @llvm.trunc.f64(double %f)
|
||||||
|
define double @f14(double %f) {
|
||||||
|
; CHECK-LABEL: f14:
|
||||||
|
; CHECK: fidbra %f0, 5, %f0, 4
|
||||||
|
; CHECK: br %r14
|
||||||
|
%res = call double @llvm.trunc.f64(double %f)
|
||||||
|
ret double %res
|
||||||
|
}
|
||||||
|
|
||||||
|
; Test trunc for f128.
|
||||||
|
declare fp128 @llvm.trunc.f128(fp128 %f)
|
||||||
|
define void @f15(fp128 *%ptr) {
|
||||||
|
; CHECK-LABEL: f15:
|
||||||
|
; CHECK: fixbra %f0, 5, %f0, 4
|
||||||
|
; CHECK: br %r14
|
||||||
|
%src = load fp128 , fp128 *%ptr
|
||||||
|
%res = call fp128 @llvm.trunc.f128(fp128 %src)
|
||||||
|
store fp128 %res, fp128 *%ptr
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; Test round for f32.
|
||||||
|
declare float @llvm.round.f32(float %f)
|
||||||
|
define float @f16(float %f) {
|
||||||
|
; CHECK-LABEL: f16:
|
||||||
|
; CHECK: fiebra %f0, 1, %f0, 4
|
||||||
|
; CHECK: br %r14
|
||||||
|
%res = call float @llvm.round.f32(float %f)
|
||||||
|
ret float %res
|
||||||
|
}
|
||||||
|
|
||||||
|
; Test round for f64.
|
||||||
|
declare double @llvm.round.f64(double %f)
|
||||||
|
define double @f17(double %f) {
|
||||||
|
; CHECK-LABEL: f17:
|
||||||
|
; CHECK: fidbra %f0, 1, %f0, 4
|
||||||
|
; CHECK: br %r14
|
||||||
|
%res = call double @llvm.round.f64(double %f)
|
||||||
|
ret double %res
|
||||||
|
}
|
||||||
|
|
||||||
|
; Test round for f128.
|
||||||
|
declare fp128 @llvm.round.f128(fp128 %f)
|
||||||
|
define void @f18(fp128 *%ptr) {
|
||||||
|
; CHECK-LABEL: f18:
|
||||||
|
; CHECK: fixbra %f0, 1, %f0, 4
|
||||||
|
; CHECK: br %r14
|
||||||
|
%src = load fp128 , fp128 *%ptr
|
||||||
|
%res = call fp128 @llvm.round.f128(fp128 %src)
|
||||||
|
store fp128 %res, fp128 *%ptr
|
||||||
|
ret void
|
||||||
|
}
|
|
@ -1,6 +1,8 @@
|
||||||
; Test 32-bit square root.
|
; Test 32-bit square root.
|
||||||
;
|
;
|
||||||
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
|
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \
|
||||||
|
; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s
|
||||||
|
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
|
||||||
|
|
||||||
declare float @llvm.sqrt.f32(float)
|
declare float @llvm.sqrt.f32(float)
|
||||||
declare float @sqrtf(float)
|
declare float @sqrtf(float)
|
||||||
|
@ -77,7 +79,7 @@ define float @f6(float *%base, i64 %index) {
|
||||||
; to use SQEB if possible.
|
; to use SQEB if possible.
|
||||||
define void @f7(float *%ptr) {
|
define void @f7(float *%ptr) {
|
||||||
; CHECK-LABEL: f7:
|
; CHECK-LABEL: f7:
|
||||||
; CHECK: sqeb {{%f[0-9]+}}, 16{{[04]}}(%r15)
|
; CHECK-SCALAR: sqeb {{%f[0-9]+}}, 16{{[04]}}(%r15)
|
||||||
; CHECK: br %r14
|
; CHECK: br %r14
|
||||||
%val0 = load volatile float , float *%ptr
|
%val0 = load volatile float , float *%ptr
|
||||||
%val1 = load volatile float , float *%ptr
|
%val1 = load volatile float , float *%ptr
|
||||||
|
@ -160,7 +162,7 @@ define float @f8(float %dummy, float %val) {
|
||||||
; CHECK: sqebr %f0, %f2
|
; CHECK: sqebr %f0, %f2
|
||||||
; CHECK: cebr %f0, %f0
|
; CHECK: cebr %f0, %f0
|
||||||
; CHECK: bnor %r14
|
; CHECK: bnor %r14
|
||||||
; CHECK: ler %f0, %f2
|
; CHECK: {{ler|ldr}} %f0, %f2
|
||||||
; CHECK: jg sqrtf@PLT
|
; CHECK: jg sqrtf@PLT
|
||||||
%res = tail call float @sqrtf(float %val)
|
%res = tail call float @sqrtf(float %val)
|
||||||
ret float %res
|
ret float %res
|
||||||
|
|
|
@ -1,6 +1,8 @@
|
||||||
; Test 32-bit floating-point subtraction.
|
; Test 32-bit floating-point subtraction.
|
||||||
;
|
;
|
||||||
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
|
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \
|
||||||
|
; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s
|
||||||
|
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
|
||||||
|
|
||||||
declare float @foo()
|
declare float @foo()
|
||||||
|
|
||||||
|
@ -76,7 +78,7 @@ define float @f6(float %f1, float *%base, i64 %index) {
|
||||||
define float @f7(float *%ptr0) {
|
define float @f7(float *%ptr0) {
|
||||||
; CHECK-LABEL: f7:
|
; CHECK-LABEL: f7:
|
||||||
; CHECK: brasl %r14, foo@PLT
|
; CHECK: brasl %r14, foo@PLT
|
||||||
; CHECK: seb %f0, 16{{[04]}}(%r15)
|
; CHECK-SCALAR: seb %f0, 16{{[04]}}(%r15)
|
||||||
; CHECK: br %r14
|
; CHECK: br %r14
|
||||||
%ptr1 = getelementptr float, float *%ptr0, i64 2
|
%ptr1 = getelementptr float, float *%ptr0, i64 2
|
||||||
%ptr2 = getelementptr float, float *%ptr0, i64 4
|
%ptr2 = getelementptr float, float *%ptr0, i64 4
|
||||||
|
|
|
@ -0,0 +1,47 @@
|
||||||
|
; Test f32 and v4f32 absolute on z14.
|
||||||
|
;
|
||||||
|
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
|
||||||
|
|
||||||
|
declare float @llvm.fabs.f32(float)
|
||||||
|
declare <4 x float> @llvm.fabs.v4f32(<4 x float>)
|
||||||
|
|
||||||
|
; Test a plain absolute.
|
||||||
|
define <4 x float> @f1(<4 x float> %val) {
|
||||||
|
; CHECK-LABEL: f1:
|
||||||
|
; CHECK: vflpsb %v24, %v24
|
||||||
|
; CHECK: br %r14
|
||||||
|
%ret = call <4 x float> @llvm.fabs.v4f32(<4 x float> %val)
|
||||||
|
ret <4 x float> %ret
|
||||||
|
}
|
||||||
|
|
||||||
|
; Test a negative absolute.
|
||||||
|
define <4 x float> @f2(<4 x float> %val) {
|
||||||
|
; CHECK-LABEL: f2:
|
||||||
|
; CHECK: vflnsb %v24, %v24
|
||||||
|
; CHECK: br %r14
|
||||||
|
%abs = call <4 x float> @llvm.fabs.v4f32(<4 x float> %val)
|
||||||
|
%ret = fsub <4 x float> <float -0.0, float -0.0,
|
||||||
|
float -0.0, float -0.0>, %abs
|
||||||
|
ret <4 x float> %ret
|
||||||
|
}
|
||||||
|
|
||||||
|
; Test an f32 absolute that uses vector registers.
|
||||||
|
define float @f3(<4 x float> %val) {
|
||||||
|
; CHECK-LABEL: f3:
|
||||||
|
; CHECK: wflpsb %f0, %v24
|
||||||
|
; CHECK: br %r14
|
||||||
|
%scalar = extractelement <4 x float> %val, i32 0
|
||||||
|
%ret = call float @llvm.fabs.f32(float %scalar)
|
||||||
|
ret float %ret
|
||||||
|
}
|
||||||
|
|
||||||
|
; Test an f32 negative absolute that uses vector registers.
|
||||||
|
define float @f4(<4 x float> %val) {
|
||||||
|
; CHECK-LABEL: f4:
|
||||||
|
; CHECK: wflnsb %f0, %v24
|
||||||
|
; CHECK: br %r14
|
||||||
|
%scalar = extractelement <4 x float> %val, i32 0
|
||||||
|
%abs = call float @llvm.fabs.f32(float %scalar)
|
||||||
|
%ret = fsub float -0.0, %abs
|
||||||
|
ret float %ret
|
||||||
|
}
|
|
@ -0,0 +1,24 @@
|
||||||
|
; Test vector addition on z14.
|
||||||
|
;
|
||||||
|
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
|
||||||
|
|
||||||
|
; Test a v4f32 addition.
|
||||||
|
define <4 x float> @f1(<4 x float> %dummy, <4 x float> %val1,
|
||||||
|
<4 x float> %val2) {
|
||||||
|
; CHECK-LABEL: f1:
|
||||||
|
; CHECK: vfasb %v24, %v26, %v28
|
||||||
|
; CHECK: br %r14
|
||||||
|
%ret = fadd <4 x float> %val1, %val2
|
||||||
|
ret <4 x float> %ret
|
||||||
|
}
|
||||||
|
|
||||||
|
; Test an f32 addition that uses vector registers.
|
||||||
|
define float @f2(<4 x float> %val1, <4 x float> %val2) {
|
||||||
|
; CHECK-LABEL: f2:
|
||||||
|
; CHECK: wfasb %f0, %v24, %v26
|
||||||
|
; CHECK: br %r14
|
||||||
|
%scalar1 = extractelement <4 x float> %val1, i32 0
|
||||||
|
%scalar2 = extractelement <4 x float> %val2, i32 0
|
||||||
|
%ret = fadd float %scalar1, %scalar2
|
||||||
|
ret float %ret
|
||||||
|
}
|
|
@ -0,0 +1,349 @@
|
||||||
|
; Test f32 and v4f32 comparisons on z14.
|
||||||
|
;
|
||||||
|
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
|
||||||
|
|
||||||
|
; Test oeq.
|
||||||
|
define <4 x i32> @f1(<4 x i32> %dummy, <4 x float> %val1, <4 x float> %val2) {
|
||||||
|
; CHECK-LABEL: f1:
|
||||||
|
; CHECK: vfcesb %v24, %v26, %v28
|
||||||
|
; CHECK-NEXT: br %r14
|
||||||
|
%cmp = fcmp oeq <4 x float> %val1, %val2
|
||||||
|
%ret = sext <4 x i1> %cmp to <4 x i32>
|
||||||
|
ret <4 x i32> %ret
|
||||||
|
}
|
||||||
|
|
||||||
|
; Test one.
|
||||||
|
define <4 x i32> @f2(<4 x i32> %dummy, <4 x float> %val1, <4 x float> %val2) {
|
||||||
|
; CHECK-LABEL: f2:
|
||||||
|
; CHECK-DAG: vfchsb [[REG1:%v[0-9]+]], %v28, %v26
|
||||||
|
; CHECK-DAG: vfchsb [[REG2:%v[0-9]+]], %v26, %v28
|
||||||
|
; CHECK: vo %v24, [[REG1]], [[REG2]]
|
||||||
|
; CHECK-NEXT: br %r14
|
||||||
|
%cmp = fcmp one <4 x float> %val1, %val2
|
||||||
|
%ret = sext <4 x i1> %cmp to <4 x i32>
|
||||||
|
ret <4 x i32> %ret
|
||||||
|
}
|
||||||
|
|
||||||
|
; Test ogt.
|
||||||
|
define <4 x i32> @f3(<4 x i32> %dummy, <4 x float> %val1, <4 x float> %val2) {
|
||||||
|
; CHECK-LABEL: f3:
|
||||||
|
; CHECK: vfchsb %v24, %v26, %v28
|
||||||
|
; CHECK-NEXT: br %r14
|
||||||
|
%cmp = fcmp ogt <4 x float> %val1, %val2
|
||||||
|
%ret = sext <4 x i1> %cmp to <4 x i32>
|
||||||
|
ret <4 x i32> %ret
|
||||||
|
}
|
||||||
|
|
||||||
|
; Test oge.
|
||||||
|
define <4 x i32> @f4(<4 x i32> %dummy, <4 x float> %val1, <4 x float> %val2) {
|
||||||
|
; CHECK-LABEL: f4:
|
||||||
|
; CHECK: vfchesb %v24, %v26, %v28
|
||||||
|
; CHECK-NEXT: br %r14
|
||||||
|
%cmp = fcmp oge <4 x float> %val1, %val2
|
||||||
|
%ret = sext <4 x i1> %cmp to <4 x i32>
|
||||||
|
ret <4 x i32> %ret
|
||||||
|
}
|
||||||
|
|
||||||
|
; Test ole.
|
||||||
|
define <4 x i32> @f5(<4 x i32> %dummy, <4 x float> %val1, <4 x float> %val2) {
|
||||||
|
; CHECK-LABEL: f5:
|
||||||
|
; CHECK: vfchesb %v24, %v28, %v26
|
||||||
|
; CHECK-NEXT: br %r14
|
||||||
|
%cmp = fcmp ole <4 x float> %val1, %val2
|
||||||
|
%ret = sext <4 x i1> %cmp to <4 x i32>
|
||||||
|
ret <4 x i32> %ret
|
||||||
|
}
|
||||||
|
|
||||||
|
; Test olt.
|
||||||
|
define <4 x i32> @f6(<4 x i32> %dummy, <4 x float> %val1, <4 x float> %val2) {
|
||||||
|
; CHECK-LABEL: f6:
|
||||||
|
; CHECK: vfchsb %v24, %v28, %v26
|
||||||
|
; CHECK-NEXT: br %r14
|
||||||
|
%cmp = fcmp olt <4 x float> %val1, %val2
|
||||||
|
%ret = sext <4 x i1> %cmp to <4 x i32>
|
||||||
|
ret <4 x i32> %ret
|
||||||
|
}
|
||||||
|
|
||||||
|
; Test ueq.
|
||||||
|
define <4 x i32> @f7(<4 x i32> %dummy, <4 x float> %val1, <4 x float> %val2) {
|
||||||
|
; CHECK-LABEL: f7:
|
||||||
|
; CHECK-DAG: vfchsb [[REG1:%v[0-9]+]], %v28, %v26
|
||||||
|
; CHECK-DAG: vfchsb [[REG2:%v[0-9]+]], %v26, %v28
|
||||||
|
; CHECK: vno %v24, [[REG1]], [[REG2]]
|
||||||
|
; CHECK-NEXT: br %r14
|
||||||
|
%cmp = fcmp ueq <4 x float> %val1, %val2
|
||||||
|
%ret = sext <4 x i1> %cmp to <4 x i32>
|
||||||
|
ret <4 x i32> %ret
|
||||||
|
}
|
||||||
|
|
||||||
|
; Test une.
|
||||||
|
define <4 x i32> @f8(<4 x i32> %dummy, <4 x float> %val1, <4 x float> %val2) {
|
||||||
|
; CHECK-LABEL: f8:
|
||||||
|
; CHECK: vfcesb [[REG:%v[0-9]+]], %v26, %v28
|
||||||
|
; CHECK-NEXT: vno %v24, [[REG]], [[REG]]
|
||||||
|
; CHECK-NEXT: br %r14
|
||||||
|
%cmp = fcmp une <4 x float> %val1, %val2
|
||||||
|
%ret = sext <4 x i1> %cmp to <4 x i32>
|
||||||
|
ret <4 x i32> %ret
|
||||||
|
}
|
||||||
|
|
||||||
|
; Test ugt.
|
||||||
|
define <4 x i32> @f9(<4 x i32> %dummy, <4 x float> %val1, <4 x float> %val2) {
|
||||||
|
; CHECK-LABEL: f9:
|
||||||
|
; CHECK: vfchesb [[REG:%v[0-9]+]], %v28, %v26
|
||||||
|
; CHECK-NEXT: vno %v24, [[REG]], [[REG]]
|
||||||
|
; CHECK-NEXT: br %r14
|
||||||
|
%cmp = fcmp ugt <4 x float> %val1, %val2
|
||||||
|
%ret = sext <4 x i1> %cmp to <4 x i32>
|
||||||
|
ret <4 x i32> %ret
|
||||||
|
}
|
||||||
|
|
||||||
|
; Test uge.
|
||||||
|
define <4 x i32> @f10(<4 x i32> %dummy, <4 x float> %val1,
|
||||||
|
<4 x float> %val2) {
|
||||||
|
; CHECK-LABEL: f10:
|
||||||
|
; CHECK: vfchsb [[REG:%v[0-9]+]], %v28, %v26
|
||||||
|
; CHECK-NEXT: vno %v24, [[REG]], [[REG]]
|
||||||
|
; CHECK-NEXT: br %r14
|
||||||
|
%cmp = fcmp uge <4 x float> %val1, %val2
|
||||||
|
%ret = sext <4 x i1> %cmp to <4 x i32>
|
||||||
|
ret <4 x i32> %ret
|
||||||
|
}
|
||||||
|
|
||||||
|
; Test ule.
|
||||||
|
define <4 x i32> @f11(<4 x i32> %dummy, <4 x float> %val1,
|
||||||
|
<4 x float> %val2) {
|
||||||
|
; CHECK-LABEL: f11:
|
||||||
|
; CHECK: vfchsb [[REG:%v[0-9]+]], %v26, %v28
|
||||||
|
; CHECK-NEXT: vno %v24, [[REG]], [[REG]]
|
||||||
|
; CHECK-NEXT: br %r14
|
||||||
|
%cmp = fcmp ule <4 x float> %val1, %val2
|
||||||
|
%ret = sext <4 x i1> %cmp to <4 x i32>
|
||||||
|
ret <4 x i32> %ret
|
||||||
|
}
|
||||||
|
|
||||||
|
; Test ult.
|
||||||
|
define <4 x i32> @f12(<4 x i32> %dummy, <4 x float> %val1,
|
||||||
|
<4 x float> %val2) {
|
||||||
|
; CHECK-LABEL: f12:
|
||||||
|
; CHECK: vfchesb [[REG:%v[0-9]+]], %v26, %v28
|
||||||
|
; CHECK-NEXT: vno %v24, [[REG]], [[REG]]
|
||||||
|
; CHECK-NEXT: br %r14
|
||||||
|
%cmp = fcmp ult <4 x float> %val1, %val2
|
||||||
|
%ret = sext <4 x i1> %cmp to <4 x i32>
|
||||||
|
ret <4 x i32> %ret
|
||||||
|
}
|
||||||
|
|
||||||
|
; Test ord.
|
||||||
|
define <4 x i32> @f13(<4 x i32> %dummy, <4 x float> %val1,
|
||||||
|
<4 x float> %val2) {
|
||||||
|
; CHECK-LABEL: f13:
|
||||||
|
; CHECK-DAG: vfchsb [[REG1:%v[0-9]+]], %v28, %v26
|
||||||
|
; CHECK-DAG: vfchesb [[REG2:%v[0-9]+]], %v26, %v28
|
||||||
|
; CHECK: vo %v24, [[REG1]], [[REG2]]
|
||||||
|
; CHECK-NEXT: br %r14
|
||||||
|
%cmp = fcmp ord <4 x float> %val1, %val2
|
||||||
|
%ret = sext <4 x i1> %cmp to <4 x i32>
|
||||||
|
ret <4 x i32> %ret
|
||||||
|
}
|
||||||
|
|
||||||
|
; Test uno.
|
||||||
|
define <4 x i32> @f14(<4 x i32> %dummy, <4 x float> %val1,
|
||||||
|
<4 x float> %val2) {
|
||||||
|
; CHECK-LABEL: f14:
|
||||||
|
; CHECK-DAG: vfchsb [[REG1:%v[0-9]+]], %v28, %v26
|
||||||
|
; CHECK-DAG: vfchesb [[REG2:%v[0-9]+]], %v26, %v28
|
||||||
|
; CHECK: vno %v24, [[REG1]], [[REG2]]
|
||||||
|
; CHECK-NEXT: br %r14
|
||||||
|
%cmp = fcmp uno <4 x float> %val1, %val2
|
||||||
|
%ret = sext <4 x i1> %cmp to <4 x i32>
|
||||||
|
ret <4 x i32> %ret
|
||||||
|
}
|
||||||
|
|
||||||
|
; Test oeq selects.
|
||||||
|
define <4 x float> @f15(<4 x float> %val1, <4 x float> %val2,
|
||||||
|
<4 x float> %val3, <4 x float> %val4) {
|
||||||
|
; CHECK-LABEL: f15:
|
||||||
|
; CHECK: vfcesb [[REG:%v[0-9]+]], %v24, %v26
|
||||||
|
; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]]
|
||||||
|
; CHECK-NEXT: br %r14
|
||||||
|
%cmp = fcmp oeq <4 x float> %val1, %val2
|
||||||
|
%ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4
|
||||||
|
ret <4 x float> %ret
|
||||||
|
}
|
||||||
|
|
||||||
|
; Test one selects.
|
||||||
|
define <4 x float> @f16(<4 x float> %val1, <4 x float> %val2,
|
||||||
|
<4 x float> %val3, <4 x float> %val4) {
|
||||||
|
; CHECK-LABEL: f16:
|
||||||
|
; CHECK-DAG: vfchsb [[REG1:%v[0-9]+]], %v26, %v24
|
||||||
|
; CHECK-DAG: vfchsb [[REG2:%v[0-9]+]], %v24, %v26
|
||||||
|
; CHECK: vo [[REG:%v[0-9]+]], [[REG1]], [[REG2]]
|
||||||
|
; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]]
|
||||||
|
; CHECK-NEXT: br %r14
|
||||||
|
%cmp = fcmp one <4 x float> %val1, %val2
|
||||||
|
%ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4
|
||||||
|
ret <4 x float> %ret
|
||||||
|
}
|
||||||
|
|
||||||
|
; Test ogt selects.
|
||||||
|
define <4 x float> @f17(<4 x float> %val1, <4 x float> %val2,
|
||||||
|
<4 x float> %val3, <4 x float> %val4) {
|
||||||
|
; CHECK-LABEL: f17:
|
||||||
|
; CHECK: vfchsb [[REG:%v[0-9]+]], %v24, %v26
|
||||||
|
; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]]
|
||||||
|
; CHECK-NEXT: br %r14
|
||||||
|
%cmp = fcmp ogt <4 x float> %val1, %val2
|
||||||
|
%ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4
|
||||||
|
ret <4 x float> %ret
|
||||||
|
}
|
||||||
|
|
||||||
|
; Test oge selects.
|
||||||
|
define <4 x float> @f18(<4 x float> %val1, <4 x float> %val2,
|
||||||
|
<4 x float> %val3, <4 x float> %val4) {
|
||||||
|
; CHECK-LABEL: f18:
|
||||||
|
; CHECK: vfchesb [[REG:%v[0-9]+]], %v24, %v26
|
||||||
|
; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]]
|
||||||
|
; CHECK-NEXT: br %r14
|
||||||
|
%cmp = fcmp oge <4 x float> %val1, %val2
|
||||||
|
%ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4
|
||||||
|
ret <4 x float> %ret
|
||||||
|
}
|
||||||
|
|
||||||
|
; Test ole selects.
|
||||||
|
define <4 x float> @f19(<4 x float> %val1, <4 x float> %val2,
|
||||||
|
<4 x float> %val3, <4 x float> %val4) {
|
||||||
|
; CHECK-LABEL: f19:
|
||||||
|
; CHECK: vfchesb [[REG:%v[0-9]+]], %v26, %v24
|
||||||
|
; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]]
|
||||||
|
; CHECK-NEXT: br %r14
|
||||||
|
%cmp = fcmp ole <4 x float> %val1, %val2
|
||||||
|
%ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4
|
||||||
|
ret <4 x float> %ret
|
||||||
|
}
|
||||||
|
|
||||||
|
; Test olt selects.
|
||||||
|
define <4 x float> @f20(<4 x float> %val1, <4 x float> %val2,
|
||||||
|
<4 x float> %val3, <4 x float> %val4) {
|
||||||
|
; CHECK-LABEL: f20:
|
||||||
|
; CHECK: vfchsb [[REG:%v[0-9]+]], %v26, %v24
|
||||||
|
; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]]
|
||||||
|
; CHECK-NEXT: br %r14
|
||||||
|
%cmp = fcmp olt <4 x float> %val1, %val2
|
||||||
|
%ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4
|
||||||
|
ret <4 x float> %ret
|
||||||
|
}
|
||||||
|
|
||||||
|
; Test ueq selects.
|
||||||
|
define <4 x float> @f21(<4 x float> %val1, <4 x float> %val2,
|
||||||
|
<4 x float> %val3, <4 x float> %val4) {
|
||||||
|
; CHECK-LABEL: f21:
|
||||||
|
; CHECK-DAG: vfchsb [[REG1:%v[0-9]+]], %v26, %v24
|
||||||
|
; CHECK-DAG: vfchsb [[REG2:%v[0-9]+]], %v24, %v26
|
||||||
|
; CHECK: vo [[REG:%v[0-9]+]], [[REG1]], [[REG2]]
|
||||||
|
; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]]
|
||||||
|
; CHECK-NEXT: br %r14
|
||||||
|
%cmp = fcmp ueq <4 x float> %val1, %val2
|
||||||
|
%ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4
|
||||||
|
ret <4 x float> %ret
|
||||||
|
}
|
||||||
|
|
||||||
|
; Test une selects.
|
||||||
|
define <4 x float> @f22(<4 x float> %val1, <4 x float> %val2,
|
||||||
|
<4 x float> %val3, <4 x float> %val4) {
|
||||||
|
; CHECK-LABEL: f22:
|
||||||
|
; CHECK: vfcesb [[REG:%v[0-9]+]], %v24, %v26
|
||||||
|
; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]]
|
||||||
|
; CHECK-NEXT: br %r14
|
||||||
|
%cmp = fcmp une <4 x float> %val1, %val2
|
||||||
|
%ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4
|
||||||
|
ret <4 x float> %ret
|
||||||
|
}
|
||||||
|
|
||||||
|
; Test ugt selects.
|
||||||
|
define <4 x float> @f23(<4 x float> %val1, <4 x float> %val2,
|
||||||
|
<4 x float> %val3, <4 x float> %val4) {
|
||||||
|
; CHECK-LABEL: f23:
|
||||||
|
; CHECK: vfchesb [[REG:%v[0-9]+]], %v26, %v24
|
||||||
|
; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]]
|
||||||
|
; CHECK-NEXT: br %r14
|
||||||
|
%cmp = fcmp ugt <4 x float> %val1, %val2
|
||||||
|
%ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4
|
||||||
|
ret <4 x float> %ret
|
||||||
|
}
|
||||||
|
|
||||||
|
; Test uge selects.
|
||||||
|
define <4 x float> @f24(<4 x float> %val1, <4 x float> %val2,
|
||||||
|
<4 x float> %val3, <4 x float> %val4) {
|
||||||
|
; CHECK-LABEL: f24:
|
||||||
|
; CHECK: vfchsb [[REG:%v[0-9]+]], %v26, %v24
|
||||||
|
; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]]
|
||||||
|
; CHECK-NEXT: br %r14
|
||||||
|
%cmp = fcmp uge <4 x float> %val1, %val2
|
||||||
|
%ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4
|
||||||
|
ret <4 x float> %ret
|
||||||
|
}
|
||||||
|
|
||||||
|
; Test ule selects.
|
||||||
|
define <4 x float> @f25(<4 x float> %val1, <4 x float> %val2,
|
||||||
|
<4 x float> %val3, <4 x float> %val4) {
|
||||||
|
; CHECK-LABEL: f25:
|
||||||
|
; CHECK: vfchsb [[REG:%v[0-9]+]], %v24, %v26
|
||||||
|
; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]]
|
||||||
|
; CHECK-NEXT: br %r14
|
||||||
|
%cmp = fcmp ule <4 x float> %val1, %val2
|
||||||
|
%ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4
|
||||||
|
ret <4 x float> %ret
|
||||||
|
}
|
||||||
|
|
||||||
|
; Test ult selects.
|
||||||
|
define <4 x float> @f26(<4 x float> %val1, <4 x float> %val2,
|
||||||
|
<4 x float> %val3, <4 x float> %val4) {
|
||||||
|
; CHECK-LABEL: f26:
|
||||||
|
; CHECK: vfchesb [[REG:%v[0-9]+]], %v24, %v26
|
||||||
|
; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]]
|
||||||
|
; CHECK-NEXT: br %r14
|
||||||
|
%cmp = fcmp ult <4 x float> %val1, %val2
|
||||||
|
%ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4
|
||||||
|
ret <4 x float> %ret
|
||||||
|
}
|
||||||
|
|
||||||
|
; Test ord selects.
|
||||||
|
define <4 x float> @f27(<4 x float> %val1, <4 x float> %val2,
|
||||||
|
<4 x float> %val3, <4 x float> %val4) {
|
||||||
|
; CHECK-LABEL: f27:
|
||||||
|
; CHECK-DAG: vfchsb [[REG1:%v[0-9]+]], %v26, %v24
|
||||||
|
; CHECK-DAG: vfchesb [[REG2:%v[0-9]+]], %v24, %v26
|
||||||
|
; CHECK: vo [[REG:%v[0-9]+]], [[REG1]], [[REG2]]
|
||||||
|
; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]]
|
||||||
|
; CHECK-NEXT: br %r14
|
||||||
|
%cmp = fcmp ord <4 x float> %val1, %val2
|
||||||
|
%ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4
|
||||||
|
ret <4 x float> %ret
|
||||||
|
}
|
||||||
|
|
||||||
|
; Test uno selects.
|
||||||
|
define <4 x float> @f28(<4 x float> %val1, <4 x float> %val2,
|
||||||
|
<4 x float> %val3, <4 x float> %val4) {
|
||||||
|
; CHECK-LABEL: f28:
|
||||||
|
; CHECK-DAG: vfchsb [[REG1:%v[0-9]+]], %v26, %v24
|
||||||
|
; CHECK-DAG: vfchesb [[REG2:%v[0-9]+]], %v24, %v26
|
||||||
|
; CHECK: vo [[REG:%v[0-9]+]], [[REG1]], [[REG2]]
|
||||||
|
; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]]
|
||||||
|
; CHECK-NEXT: br %r14
|
||||||
|
%cmp = fcmp uno <4 x float> %val1, %val2
|
||||||
|
%ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4
|
||||||
|
ret <4 x float> %ret
|
||||||
|
}
|
||||||
|
|
||||||
|
; Test an f32 comparison that uses vector registers.
|
||||||
|
define i64 @f29(i64 %a, i64 %b, float %f1, <4 x float> %vec) {
|
||||||
|
; CHECK-LABEL: f29:
|
||||||
|
; CHECK: wfcsb %f0, %v24
|
||||||
|
; CHECK-NEXT: locgrne %r2, %r3
|
||||||
|
; CHECK: br %r14
|
||||||
|
%f2 = extractelement <4 x float> %vec, i32 0
|
||||||
|
%cond = fcmp oeq float %f1, %f2
|
||||||
|
%res = select i1 %cond, i64 %a, i64 %b
|
||||||
|
ret i64 %res
|
||||||
|
}
|
|
@ -0,0 +1,24 @@
|
||||||
|
; Test vector division on z14.
|
||||||
|
;
|
||||||
|
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
|
||||||
|
|
||||||
|
; Test a v4f32 division.
|
||||||
|
define <4 x float> @f1(<4 x float> %dummy, <4 x float> %val1,
|
||||||
|
<4 x float> %val2) {
|
||||||
|
; CHECK-LABEL: f1:
|
||||||
|
; CHECK: vfdsb %v24, %v26, %v28
|
||||||
|
; CHECK: br %r14
|
||||||
|
%ret = fdiv <4 x float> %val1, %val2
|
||||||
|
ret <4 x float> %ret
|
||||||
|
}
|
||||||
|
|
||||||
|
; Test an f32 division that uses vector registers.
|
||||||
|
define float @f2(<4 x float> %val1, <4 x float> %val2) {
|
||||||
|
; CHECK-LABEL: f2:
|
||||||
|
; CHECK: wfdsb %f0, %v24, %v26
|
||||||
|
; CHECK: br %r14
|
||||||
|
%scalar1 = extractelement <4 x float> %val1, i32 0
|
||||||
|
%scalar2 = extractelement <4 x float> %val2, i32 0
|
||||||
|
%ret = fdiv float %scalar1, %scalar2
|
||||||
|
ret float %ret
|
||||||
|
}
|
|
@ -6,8 +6,17 @@ declare <2 x i64> @llvm.s390.vbperm(<16 x i8>, <16 x i8>)
|
||||||
declare <16 x i8> @llvm.s390.vmslg(<2 x i64>, <2 x i64>, <16 x i8>, i32)
|
declare <16 x i8> @llvm.s390.vmslg(<2 x i64>, <2 x i64>, <16 x i8>, i32)
|
||||||
declare <16 x i8> @llvm.s390.vlrl(i32, i8 *)
|
declare <16 x i8> @llvm.s390.vlrl(i32, i8 *)
|
||||||
declare void @llvm.s390.vstrl(<16 x i8>, i32, i8 *)
|
declare void @llvm.s390.vstrl(<16 x i8>, i32, i8 *)
|
||||||
|
|
||||||
|
declare {<4 x i32>, i32} @llvm.s390.vfcesbs(<4 x float>, <4 x float>)
|
||||||
|
declare {<4 x i32>, i32} @llvm.s390.vfchsbs(<4 x float>, <4 x float>)
|
||||||
|
declare {<4 x i32>, i32} @llvm.s390.vfchesbs(<4 x float>, <4 x float>)
|
||||||
|
declare {<4 x i32>, i32} @llvm.s390.vftcisb(<4 x float>, i32)
|
||||||
|
declare <4 x float> @llvm.s390.vfisb(<4 x float>, i32, i32)
|
||||||
|
|
||||||
declare <2 x double> @llvm.s390.vfmaxdb(<2 x double>, <2 x double>, i32)
|
declare <2 x double> @llvm.s390.vfmaxdb(<2 x double>, <2 x double>, i32)
|
||||||
declare <2 x double> @llvm.s390.vfmindb(<2 x double>, <2 x double>, i32)
|
declare <2 x double> @llvm.s390.vfmindb(<2 x double>, <2 x double>, i32)
|
||||||
|
declare <4 x float> @llvm.s390.vfmaxsb(<4 x float>, <4 x float>, i32)
|
||||||
|
declare <4 x float> @llvm.s390.vfminsb(<4 x float>, <4 x float>, i32)
|
||||||
|
|
||||||
; VBPERM.
|
; VBPERM.
|
||||||
define <2 x i64> @test_vbperm(<16 x i8> %a, <16 x i8> %b) {
|
define <2 x i64> @test_vbperm(<16 x i8> %a, <16 x i8> %b) {
|
||||||
|
@ -192,6 +201,208 @@ define void @test_vstrl4(<16 x i8> %vec, i8 *%base, i64 %index) {
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
|
; VFCESBS with no processing of the result.
|
||||||
|
define i32 @test_vfcesbs(<4 x float> %a, <4 x float> %b) {
|
||||||
|
; CHECK-LABEL: test_vfcesbs:
|
||||||
|
; CHECK: vfcesbs {{%v[0-9]+}}, %v24, %v26
|
||||||
|
; CHECK: ipm %r2
|
||||||
|
; CHECK: srl %r2, 28
|
||||||
|
; CHECK: br %r14
|
||||||
|
%call = call {<4 x i32>, i32} @llvm.s390.vfcesbs(<4 x float> %a,
|
||||||
|
<4 x float> %b)
|
||||||
|
%res = extractvalue {<4 x i32>, i32} %call, 1
|
||||||
|
ret i32 %res
|
||||||
|
}
|
||||||
|
|
||||||
|
; VFCESBS, returning 1 if any elements are equal (CC != 3).
|
||||||
|
define i32 @test_vfcesbs_any_bool(<4 x float> %a, <4 x float> %b) {
|
||||||
|
; CHECK-LABEL: test_vfcesbs_any_bool:
|
||||||
|
; CHECK: vfcesbs {{%v[0-9]+}}, %v24, %v26
|
||||||
|
; CHECK: ipm %r2
|
||||||
|
; CHECK: afi %r2, -536870912
|
||||||
|
; CHECK: srl %r2, 31
|
||||||
|
; CHECK: br %r14
|
||||||
|
%call = call {<4 x i32>, i32} @llvm.s390.vfcesbs(<4 x float> %a,
|
||||||
|
<4 x float> %b)
|
||||||
|
%res = extractvalue {<4 x i32>, i32} %call, 1
|
||||||
|
%cmp = icmp ne i32 %res, 3
|
||||||
|
%ext = zext i1 %cmp to i32
|
||||||
|
ret i32 %ext
|
||||||
|
}
|
||||||
|
|
||||||
|
; VFCESBS, storing to %ptr if any elements are equal.
|
||||||
|
define <4 x i32> @test_vfcesbs_any_store(<4 x float> %a, <4 x float> %b,
|
||||||
|
i32 *%ptr) {
|
||||||
|
; CHECK-LABEL: test_vfcesbs_any_store:
|
||||||
|
; CHECK-NOT: %r
|
||||||
|
; CHECK: vfcesbs %v24, %v24, %v26
|
||||||
|
; CHECK-NEXT: {{bor|bnler}} %r14
|
||||||
|
; CHECK: mvhi 0(%r2), 0
|
||||||
|
; CHECK: br %r14
|
||||||
|
%call = call {<4 x i32>, i32} @llvm.s390.vfcesbs(<4 x float> %a,
|
||||||
|
<4 x float> %b)
|
||||||
|
%res = extractvalue {<4 x i32>, i32} %call, 0
|
||||||
|
%cc = extractvalue {<4 x i32>, i32} %call, 1
|
||||||
|
%cmp = icmp ule i32 %cc, 2
|
||||||
|
br i1 %cmp, label %store, label %exit
|
||||||
|
|
||||||
|
store:
|
||||||
|
store i32 0, i32 *%ptr
|
||||||
|
br label %exit
|
||||||
|
|
||||||
|
exit:
|
||||||
|
ret <4 x i32> %res
|
||||||
|
}
|
||||||
|
|
||||||
|
; VFCHSBS with no processing of the result.
|
||||||
|
define i32 @test_vfchsbs(<4 x float> %a, <4 x float> %b) {
|
||||||
|
; CHECK-LABEL: test_vfchsbs:
|
||||||
|
; CHECK: vfchsbs {{%v[0-9]+}}, %v24, %v26
|
||||||
|
; CHECK: ipm %r2
|
||||||
|
; CHECK: srl %r2, 28
|
||||||
|
; CHECK: br %r14
|
||||||
|
%call = call {<4 x i32>, i32} @llvm.s390.vfchsbs(<4 x float> %a,
|
||||||
|
<4 x float> %b)
|
||||||
|
%res = extractvalue {<4 x i32>, i32} %call, 1
|
||||||
|
ret i32 %res
|
||||||
|
}
|
||||||
|
|
||||||
|
; VFCHSBS, returning 1 if not all elements are higher.
|
||||||
|
define i32 @test_vfchsbs_notall_bool(<4 x float> %a, <4 x float> %b) {
|
||||||
|
; CHECK-LABEL: test_vfchsbs_notall_bool:
|
||||||
|
; CHECK: vfchsbs {{%v[0-9]+}}, %v24, %v26
|
||||||
|
; CHECK: ipm [[REG:%r[0-5]]]
|
||||||
|
; CHECK: risblg %r2, [[REG]], 31, 159, 36
|
||||||
|
; CHECK: br %r14
|
||||||
|
%call = call {<4 x i32>, i32} @llvm.s390.vfchsbs(<4 x float> %a,
|
||||||
|
<4 x float> %b)
|
||||||
|
%res = extractvalue {<4 x i32>, i32} %call, 1
|
||||||
|
%cmp = icmp sge i32 %res, 1
|
||||||
|
%ext = zext i1 %cmp to i32
|
||||||
|
ret i32 %ext
|
||||||
|
}
|
||||||
|
|
||||||
|
; VFCHSBS, storing to %ptr if not all elements are higher.
|
||||||
|
define <4 x i32> @test_vfchsbs_notall_store(<4 x float> %a, <4 x float> %b,
|
||||||
|
i32 *%ptr) {
|
||||||
|
; CHECK-LABEL: test_vfchsbs_notall_store:
|
||||||
|
; CHECK-NOT: %r
|
||||||
|
; CHECK: vfchsbs %v24, %v24, %v26
|
||||||
|
; CHECK-NEXT: {{bher|ber}} %r14
|
||||||
|
; CHECK: mvhi 0(%r2), 0
|
||||||
|
; CHECK: br %r14
|
||||||
|
%call = call {<4 x i32>, i32} @llvm.s390.vfchsbs(<4 x float> %a,
|
||||||
|
<4 x float> %b)
|
||||||
|
%res = extractvalue {<4 x i32>, i32} %call, 0
|
||||||
|
%cc = extractvalue {<4 x i32>, i32} %call, 1
|
||||||
|
%cmp = icmp ugt i32 %cc, 0
|
||||||
|
br i1 %cmp, label %store, label %exit
|
||||||
|
|
||||||
|
store:
|
||||||
|
store i32 0, i32 *%ptr
|
||||||
|
br label %exit
|
||||||
|
|
||||||
|
exit:
|
||||||
|
ret <4 x i32> %res
|
||||||
|
}
|
||||||
|
|
||||||
|
; VFCHESBS with no processing of the result.
|
||||||
|
define i32 @test_vfchesbs(<4 x float> %a, <4 x float> %b) {
|
||||||
|
; CHECK-LABEL: test_vfchesbs:
|
||||||
|
; CHECK: vfchesbs {{%v[0-9]+}}, %v24, %v26
|
||||||
|
; CHECK: ipm %r2
|
||||||
|
; CHECK: srl %r2, 28
|
||||||
|
; CHECK: br %r14
|
||||||
|
%call = call {<4 x i32>, i32} @llvm.s390.vfchesbs(<4 x float> %a,
|
||||||
|
<4 x float> %b)
|
||||||
|
%res = extractvalue {<4 x i32>, i32} %call, 1
|
||||||
|
ret i32 %res
|
||||||
|
}
|
||||||
|
|
||||||
|
; VFCHESBS, returning 1 if neither element is higher or equal.
|
||||||
|
define i32 @test_vfchesbs_none_bool(<4 x float> %a, <4 x float> %b) {
|
||||||
|
; CHECK-LABEL: test_vfchesbs_none_bool:
|
||||||
|
; CHECK: vfchesbs {{%v[0-9]+}}, %v24, %v26
|
||||||
|
; CHECK: ipm [[REG:%r[0-5]]]
|
||||||
|
; CHECK: risblg %r2, [[REG]], 31, 159, 35
|
||||||
|
; CHECK: br %r14
|
||||||
|
%call = call {<4 x i32>, i32} @llvm.s390.vfchesbs(<4 x float> %a,
|
||||||
|
<4 x float> %b)
|
||||||
|
%res = extractvalue {<4 x i32>, i32} %call, 1
|
||||||
|
%cmp = icmp eq i32 %res, 3
|
||||||
|
%ext = zext i1 %cmp to i32
|
||||||
|
ret i32 %ext
|
||||||
|
}
|
||||||
|
|
||||||
|
; VFCHESBS, storing to %ptr if neither element is higher or equal.
|
||||||
|
define <4 x i32> @test_vfchesbs_none_store(<4 x float> %a, <4 x float> %b,
|
||||||
|
i32 *%ptr) {
|
||||||
|
; CHECK-LABEL: test_vfchesbs_none_store:
|
||||||
|
; CHECK-NOT: %r
|
||||||
|
; CHECK: vfchesbs %v24, %v24, %v26
|
||||||
|
; CHECK-NEXT: {{bnor|bler}} %r14
|
||||||
|
; CHECK: mvhi 0(%r2), 0
|
||||||
|
; CHECK: br %r14
|
||||||
|
%call = call {<4 x i32>, i32} @llvm.s390.vfchesbs(<4 x float> %a,
|
||||||
|
<4 x float> %b)
|
||||||
|
%res = extractvalue {<4 x i32>, i32} %call, 0
|
||||||
|
%cc = extractvalue {<4 x i32>, i32} %call, 1
|
||||||
|
%cmp = icmp uge i32 %cc, 3
|
||||||
|
br i1 %cmp, label %store, label %exit
|
||||||
|
|
||||||
|
store:
|
||||||
|
store i32 0, i32 *%ptr
|
||||||
|
br label %exit
|
||||||
|
|
||||||
|
exit:
|
||||||
|
ret <4 x i32> %res
|
||||||
|
}
|
||||||
|
|
||||||
|
; VFTCISB with the lowest useful class selector and no processing of the result.
|
||||||
|
define i32 @test_vftcisb(<4 x float> %a) {
|
||||||
|
; CHECK-LABEL: test_vftcisb:
|
||||||
|
; CHECK: vftcisb {{%v[0-9]+}}, %v24, 1
|
||||||
|
; CHECK: ipm %r2
|
||||||
|
; CHECK: srl %r2, 28
|
||||||
|
; CHECK: br %r14
|
||||||
|
%call = call {<4 x i32>, i32} @llvm.s390.vftcisb(<4 x float> %a, i32 1)
|
||||||
|
%res = extractvalue {<4 x i32>, i32} %call, 1
|
||||||
|
ret i32 %res
|
||||||
|
}
|
||||||
|
|
||||||
|
; VFTCISB with the highest useful class selector, returning 1 if all elements
|
||||||
|
; have the right class (CC == 0).
|
||||||
|
define i32 @test_vftcisb_all_bool(<4 x float> %a) {
|
||||||
|
; CHECK-LABEL: test_vftcisb_all_bool:
|
||||||
|
; CHECK: vftcisb {{%v[0-9]+}}, %v24, 4094
|
||||||
|
; CHECK: afi %r2, -268435456
|
||||||
|
; CHECK: srl %r2, 31
|
||||||
|
; CHECK: br %r14
|
||||||
|
%call = call {<4 x i32>, i32} @llvm.s390.vftcisb(<4 x float> %a, i32 4094)
|
||||||
|
%res = extractvalue {<4 x i32>, i32} %call, 1
|
||||||
|
%cmp = icmp eq i32 %res, 0
|
||||||
|
%ext = zext i1 %cmp to i32
|
||||||
|
ret i32 %ext
|
||||||
|
}
|
||||||
|
|
||||||
|
; VFISB with a rounding mode not usable via standard intrinsics.
|
||||||
|
define <4 x float> @test_vfisb_0_4(<4 x float> %a) {
|
||||||
|
; CHECK-LABEL: test_vfisb_0_4:
|
||||||
|
; CHECK: vfisb %v24, %v24, 0, 4
|
||||||
|
; CHECK: br %r14
|
||||||
|
%res = call <4 x float> @llvm.s390.vfisb(<4 x float> %a, i32 0, i32 4)
|
||||||
|
ret <4 x float> %res
|
||||||
|
}
|
||||||
|
|
||||||
|
; VFISB with IEEE-inexact exception suppressed.
|
||||||
|
define <4 x float> @test_vfisb_4_0(<4 x float> %a) {
|
||||||
|
; CHECK-LABEL: test_vfisb_4_0:
|
||||||
|
; CHECK: vfisb %v24, %v24, 4, 0
|
||||||
|
; CHECK: br %r14
|
||||||
|
%res = call <4 x float> @llvm.s390.vfisb(<4 x float> %a, i32 4, i32 0)
|
||||||
|
ret <4 x float> %res
|
||||||
|
}
|
||||||
|
|
||||||
; VFMAXDB.
|
; VFMAXDB.
|
||||||
define <2 x double> @test_vfmaxdb(<2 x double> %a, <2 x double> %b) {
|
define <2 x double> @test_vfmaxdb(<2 x double> %a, <2 x double> %b) {
|
||||||
; CHECK-LABEL: test_vfmaxdb:
|
; CHECK-LABEL: test_vfmaxdb:
|
||||||
|
@ -210,3 +421,21 @@ define <2 x double> @test_vfmindb(<2 x double> %a, <2 x double> %b) {
|
||||||
ret <2 x double> %res
|
ret <2 x double> %res
|
||||||
}
|
}
|
||||||
|
|
||||||
|
; VFMAXSB.
|
||||||
|
define <4 x float> @test_vfmaxsb(<4 x float> %a, <4 x float> %b) {
|
||||||
|
; CHECK-LABEL: test_vfmaxsb:
|
||||||
|
; CHECK: vfmaxsb %v24, %v24, %v26, 4
|
||||||
|
; CHECK: br %r14
|
||||||
|
%res = call <4 x float> @llvm.s390.vfmaxsb(<4 x float> %a, <4 x float> %b, i32 4)
|
||||||
|
ret <4 x float> %res
|
||||||
|
}
|
||||||
|
|
||||||
|
; VFMINSB.
|
||||||
|
define <4 x float> @test_vfminsb(<4 x float> %a, <4 x float> %b) {
|
||||||
|
; CHECK-LABEL: test_vfminsb:
|
||||||
|
; CHECK: vfminsb %v24, %v24, %v26, 4
|
||||||
|
; CHECK: br %r14
|
||||||
|
%res = call <4 x float> @llvm.s390.vfminsb(<4 x float> %a, <4 x float> %b, i32 4)
|
||||||
|
ret <4 x float> %res
|
||||||
|
}
|
||||||
|
|
||||||
|
|
|
@ -6,6 +6,10 @@ declare double @fmax(double, double)
|
||||||
declare double @llvm.maxnum.f64(double, double)
|
declare double @llvm.maxnum.f64(double, double)
|
||||||
declare <2 x double> @llvm.maxnum.v2f64(<2 x double>, <2 x double>)
|
declare <2 x double> @llvm.maxnum.v2f64(<2 x double>, <2 x double>)
|
||||||
|
|
||||||
|
declare float @fmaxf(float, float)
|
||||||
|
declare float @llvm.maxnum.f32(float, float)
|
||||||
|
declare <4 x float> @llvm.maxnum.v4f32(<4 x float>, <4 x float>)
|
||||||
|
|
||||||
; Test the fmax library function.
|
; Test the fmax library function.
|
||||||
define double @f1(double %dummy, double %val1, double %val2) {
|
define double @f1(double %dummy, double %val1, double %val2) {
|
||||||
; CHECK-LABEL: f1:
|
; CHECK-LABEL: f1:
|
||||||
|
@ -56,3 +60,53 @@ define <2 x double> @f5(<2 x double> %dummy, <2 x double> %val1,
|
||||||
ret <2 x double> %ret
|
ret <2 x double> %ret
|
||||||
}
|
}
|
||||||
|
|
||||||
|
; Test the fmaxf library function.
|
||||||
|
define float @f11(float %dummy, float %val1, float %val2) {
|
||||||
|
; CHECK-LABEL: f11:
|
||||||
|
; CHECK: wfmaxsb %f0, %f2, %f4, 4
|
||||||
|
; CHECK: br %r14
|
||||||
|
%ret = call float @fmaxf(float %val1, float %val2) readnone
|
||||||
|
ret float %ret
|
||||||
|
}
|
||||||
|
|
||||||
|
; Test the f32 maxnum intrinsic.
|
||||||
|
define float @f12(float %dummy, float %val1, float %val2) {
|
||||||
|
; CHECK-LABEL: f12:
|
||||||
|
; CHECK: wfmaxsb %f0, %f2, %f4, 4
|
||||||
|
; CHECK: br %r14
|
||||||
|
%ret = call float @llvm.maxnum.f32(float %val1, float %val2)
|
||||||
|
ret float %ret
|
||||||
|
}
|
||||||
|
|
||||||
|
; Test a f32 constant compare/select resulting in maxnum.
|
||||||
|
define float @f13(float %dummy, float %val) {
|
||||||
|
; CHECK-LABEL: f13:
|
||||||
|
; CHECK: lzer [[REG:%f[0-9]+]]
|
||||||
|
; CHECK: wfmaxsb %f0, %f2, [[REG]], 4
|
||||||
|
; CHECK: br %r14
|
||||||
|
%cmp = fcmp ogt float %val, 0.0
|
||||||
|
%ret = select i1 %cmp, float %val, float 0.0
|
||||||
|
ret float %ret
|
||||||
|
}
|
||||||
|
|
||||||
|
; Test a f32 constant compare/select resulting in maxnan.
|
||||||
|
define float @f14(float %dummy, float %val) {
|
||||||
|
; CHECK-LABEL: f14:
|
||||||
|
; CHECK: lzer [[REG:%f[0-9]+]]
|
||||||
|
; CHECK: wfmaxsb %f0, %f2, [[REG]], 1
|
||||||
|
; CHECK: br %r14
|
||||||
|
%cmp = fcmp ugt float %val, 0.0
|
||||||
|
%ret = select i1 %cmp, float %val, float 0.0
|
||||||
|
ret float %ret
|
||||||
|
}
|
||||||
|
|
||||||
|
; Test the v4f32 maxnum intrinsic.
|
||||||
|
define <4 x float> @f15(<4 x float> %dummy, <4 x float> %val1,
|
||||||
|
<4 x float> %val2) {
|
||||||
|
; CHECK-LABEL: f15:
|
||||||
|
; CHECK: vfmaxsb %v24, %v26, %v28, 4
|
||||||
|
; CHECK: br %r14
|
||||||
|
%ret = call <4 x float> @llvm.maxnum.v4f32(<4 x float> %val1, <4 x float> %val2)
|
||||||
|
ret <4 x float> %ret
|
||||||
|
}
|
||||||
|
|
||||||
|
|
|
@ -6,6 +6,10 @@ declare double @fmin(double, double)
|
||||||
declare double @llvm.minnum.f64(double, double)
|
declare double @llvm.minnum.f64(double, double)
|
||||||
declare <2 x double> @llvm.minnum.v2f64(<2 x double>, <2 x double>)
|
declare <2 x double> @llvm.minnum.v2f64(<2 x double>, <2 x double>)
|
||||||
|
|
||||||
|
declare float @fminf(float, float)
|
||||||
|
declare float @llvm.minnum.f32(float, float)
|
||||||
|
declare <4 x float> @llvm.minnum.v4f32(<4 x float>, <4 x float>)
|
||||||
|
|
||||||
; Test the fmin library function.
|
; Test the fmin library function.
|
||||||
define double @f1(double %dummy, double %val1, double %val2) {
|
define double @f1(double %dummy, double %val1, double %val2) {
|
||||||
; CHECK-LABEL: f1:
|
; CHECK-LABEL: f1:
|
||||||
|
@ -56,3 +60,53 @@ define <2 x double> @f5(<2 x double> %dummy, <2 x double> %val1,
|
||||||
ret <2 x double> %ret
|
ret <2 x double> %ret
|
||||||
}
|
}
|
||||||
|
|
||||||
|
; Test the fminf library function.
|
||||||
|
define float @f11(float %dummy, float %val1, float %val2) {
|
||||||
|
; CHECK-LABEL: f11:
|
||||||
|
; CHECK: wfminsb %f0, %f2, %f4, 4
|
||||||
|
; CHECK: br %r14
|
||||||
|
%ret = call float @fminf(float %val1, float %val2) readnone
|
||||||
|
ret float %ret
|
||||||
|
}
|
||||||
|
|
||||||
|
; Test the f32 minnum intrinsic.
|
||||||
|
define float @f12(float %dummy, float %val1, float %val2) {
|
||||||
|
; CHECK-LABEL: f12:
|
||||||
|
; CHECK: wfminsb %f0, %f2, %f4, 4
|
||||||
|
; CHECK: br %r14
|
||||||
|
%ret = call float @llvm.minnum.f32(float %val1, float %val2)
|
||||||
|
ret float %ret
|
||||||
|
}
|
||||||
|
|
||||||
|
; Test a f32 constant compare/select resulting in minnum.
|
||||||
|
define float @f13(float %dummy, float %val) {
|
||||||
|
; CHECK-LABEL: f13:
|
||||||
|
; CHECK: lzer [[REG:%f[0-9]+]]
|
||||||
|
; CHECK: wfminsb %f0, %f2, [[REG]], 4
|
||||||
|
; CHECK: br %r14
|
||||||
|
%cmp = fcmp olt float %val, 0.0
|
||||||
|
%ret = select i1 %cmp, float %val, float 0.0
|
||||||
|
ret float %ret
|
||||||
|
}
|
||||||
|
|
||||||
|
; Test a f32 constant compare/select resulting in minnan.
|
||||||
|
define float @f14(float %dummy, float %val) {
|
||||||
|
; CHECK-LABEL: f14:
|
||||||
|
; CHECK: lzer [[REG:%f[0-9]+]]
|
||||||
|
; CHECK: wfminsb %f0, %f2, [[REG]], 1
|
||||||
|
; CHECK: br %r14
|
||||||
|
%cmp = fcmp ult float %val, 0.0
|
||||||
|
%ret = select i1 %cmp, float %val, float 0.0
|
||||||
|
ret float %ret
|
||||||
|
}
|
||||||
|
|
||||||
|
; Test the v4f32 minnum intrinsic.
|
||||||
|
define <4 x float> @f15(<4 x float> %dummy, <4 x float> %val1,
|
||||||
|
<4 x float> %val2) {
|
||||||
|
; CHECK-LABEL: f15:
|
||||||
|
; CHECK: vfminsb %v24, %v26, %v28, 4
|
||||||
|
; CHECK: br %r14
|
||||||
|
%ret = call <4 x float> @llvm.minnum.v4f32(<4 x float> %val1, <4 x float> %val2)
|
||||||
|
ret <4 x float> %ret
|
||||||
|
}
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,24 @@
|
||||||
|
; Test vector multiplication on z14.
|
||||||
|
;
|
||||||
|
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
|
||||||
|
|
||||||
|
; Test a v4f32 multiplication.
|
||||||
|
define <4 x float> @f1(<4 x float> %dummy, <4 x float> %val1,
|
||||||
|
<4 x float> %val2) {
|
||||||
|
; CHECK-LABEL: f1:
|
||||||
|
; CHECK: vfmsb %v24, %v26, %v28
|
||||||
|
; CHECK: br %r14
|
||||||
|
%ret = fmul <4 x float> %val1, %val2
|
||||||
|
ret <4 x float> %ret
|
||||||
|
}
|
||||||
|
|
||||||
|
; Test an f32 multiplication that uses vector registers.
|
||||||
|
define float @f2(<4 x float> %val1, <4 x float> %val2) {
|
||||||
|
; CHECK-LABEL: f2:
|
||||||
|
; CHECK: wfmsb %f0, %v24, %v26
|
||||||
|
; CHECK: br %r14
|
||||||
|
%scalar1 = extractelement <4 x float> %val1, i32 0
|
||||||
|
%scalar2 = extractelement <4 x float> %val2, i32 0
|
||||||
|
%ret = fmul float %scalar1, %scalar2
|
||||||
|
ret float %ret
|
||||||
|
}
|
|
@ -0,0 +1,31 @@
|
||||||
|
; Test vector multiply-and-add on z14.
|
||||||
|
;
|
||||||
|
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
|
||||||
|
|
||||||
|
declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>)
|
||||||
|
|
||||||
|
; Test a v4f32 multiply-and-add.
|
||||||
|
define <4 x float> @f1(<4 x float> %dummy, <4 x float> %val1,
|
||||||
|
<4 x float> %val2, <4 x float> %val3) {
|
||||||
|
; CHECK-LABEL: f1:
|
||||||
|
; CHECK: vfmasb %v24, %v26, %v28, %v30
|
||||||
|
; CHECK: br %r14
|
||||||
|
%ret = call <4 x float> @llvm.fma.v4f32 (<4 x float> %val1,
|
||||||
|
<4 x float> %val2,
|
||||||
|
<4 x float> %val3)
|
||||||
|
ret <4 x float> %ret
|
||||||
|
}
|
||||||
|
|
||||||
|
; Test a v4f32 multiply-and-subtract.
|
||||||
|
define <4 x float> @f2(<4 x float> %dummy, <4 x float> %val1,
|
||||||
|
<4 x float> %val2, <4 x float> %val3) {
|
||||||
|
; CHECK-LABEL: f2:
|
||||||
|
; CHECK: vfmssb %v24, %v26, %v28, %v30
|
||||||
|
; CHECK: br %r14
|
||||||
|
%negval3 = fsub <4 x float> <float -0.0, float -0.0,
|
||||||
|
float -0.0, float -0.0>, %val3
|
||||||
|
%ret = call <4 x float> @llvm.fma.v4f32 (<4 x float> %val1,
|
||||||
|
<4 x float> %val2,
|
||||||
|
<4 x float> %negval3)
|
||||||
|
ret <4 x float> %ret
|
||||||
|
}
|
|
@ -3,6 +3,7 @@
|
||||||
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
|
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
|
||||||
|
|
||||||
declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>)
|
declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>)
|
||||||
|
declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>)
|
||||||
|
|
||||||
; Test a v2f64 negative multiply-and-add.
|
; Test a v2f64 negative multiply-and-add.
|
||||||
define <2 x double> @f1(<2 x double> %dummy, <2 x double> %val1,
|
define <2 x double> @f1(<2 x double> %dummy, <2 x double> %val1,
|
||||||
|
@ -30,3 +31,33 @@ define <2 x double> @f2(<2 x double> %dummy, <2 x double> %val1,
|
||||||
%negret = fsub <2 x double> <double -0.0, double -0.0>, %ret
|
%negret = fsub <2 x double> <double -0.0, double -0.0>, %ret
|
||||||
ret <2 x double> %negret
|
ret <2 x double> %negret
|
||||||
}
|
}
|
||||||
|
|
||||||
|
; Test a v4f32 negative multiply-and-add.
|
||||||
|
define <4 x float> @f3(<4 x float> %dummy, <4 x float> %val1,
|
||||||
|
<4 x float> %val2, <4 x float> %val3) {
|
||||||
|
; CHECK-LABEL: f3:
|
||||||
|
; CHECK: vfnmasb %v24, %v26, %v28, %v30
|
||||||
|
; CHECK: br %r14
|
||||||
|
%ret = call <4 x float> @llvm.fma.v4f32 (<4 x float> %val1,
|
||||||
|
<4 x float> %val2,
|
||||||
|
<4 x float> %val3)
|
||||||
|
%negret = fsub <4 x float> <float -0.0, float -0.0,
|
||||||
|
float -0.0, float -0.0>, %ret
|
||||||
|
ret <4 x float> %negret
|
||||||
|
}
|
||||||
|
|
||||||
|
; Test a v4f32 negative multiply-and-subtract.
|
||||||
|
define <4 x float> @f4(<4 x float> %dummy, <4 x float> %val1,
|
||||||
|
<4 x float> %val2, <4 x float> %val3) {
|
||||||
|
; CHECK-LABEL: f4:
|
||||||
|
; CHECK: vfnmssb %v24, %v26, %v28, %v30
|
||||||
|
; CHECK: br %r14
|
||||||
|
%negval3 = fsub <4 x float> <float -0.0, float -0.0,
|
||||||
|
float -0.0, float -0.0>, %val3
|
||||||
|
%ret = call <4 x float> @llvm.fma.v4f32 (<4 x float> %val1,
|
||||||
|
<4 x float> %val2,
|
||||||
|
<4 x float> %negval3)
|
||||||
|
%negret = fsub <4 x float> <float -0.0, float -0.0,
|
||||||
|
float -0.0, float -0.0>, %ret
|
||||||
|
ret <4 x float> %negret
|
||||||
|
}
|
||||||
|
|
|
@ -0,0 +1,23 @@
|
||||||
|
; Test vector negation on z14.
|
||||||
|
;
|
||||||
|
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
|
||||||
|
|
||||||
|
; Test a v4f32 negation.
|
||||||
|
define <4 x float> @f1(<4 x float> %dummy, <4 x float> %val) {
|
||||||
|
; CHECK-LABEL: f1:
|
||||||
|
; CHECK: vflcsb %v24, %v26
|
||||||
|
; CHECK: br %r14
|
||||||
|
%ret = fsub <4 x float> <float -0.0, float -0.0,
|
||||||
|
float -0.0, float -0.0>, %val
|
||||||
|
ret <4 x float> %ret
|
||||||
|
}
|
||||||
|
|
||||||
|
; Test an f32 negation that uses vector registers.
|
||||||
|
define float @f2(<4 x float> %val) {
|
||||||
|
; CHECK-LABEL: f2:
|
||||||
|
; CHECK: wflcsb %f0, %v24
|
||||||
|
; CHECK: br %r14
|
||||||
|
%scalar = extractelement <4 x float> %val, i32 0
|
||||||
|
%ret = fsub float -0.0, %scalar
|
||||||
|
ret float %ret
|
||||||
|
}
|
|
@ -0,0 +1,118 @@
|
||||||
|
; Test v4f32 rounding on z14.
|
||||||
|
;
|
||||||
|
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
|
||||||
|
|
||||||
|
declare float @llvm.rint.f32(float)
|
||||||
|
declare float @llvm.nearbyint.f32(float)
|
||||||
|
declare float @llvm.floor.f32(float)
|
||||||
|
declare float @llvm.ceil.f32(float)
|
||||||
|
declare float @llvm.trunc.f32(float)
|
||||||
|
declare float @llvm.round.f32(float)
|
||||||
|
declare <4 x float> @llvm.rint.v4f32(<4 x float>)
|
||||||
|
declare <4 x float> @llvm.nearbyint.v4f32(<4 x float>)
|
||||||
|
declare <4 x float> @llvm.floor.v4f32(<4 x float>)
|
||||||
|
declare <4 x float> @llvm.ceil.v4f32(<4 x float>)
|
||||||
|
declare <4 x float> @llvm.trunc.v4f32(<4 x float>)
|
||||||
|
declare <4 x float> @llvm.round.v4f32(<4 x float>)
|
||||||
|
|
||||||
|
define <4 x float> @f1(<4 x float> %val) {
|
||||||
|
; CHECK-LABEL: f1:
|
||||||
|
; CHECK: vfisb %v24, %v24, 0, 0
|
||||||
|
; CHECK: br %r14
|
||||||
|
%res = call <4 x float> @llvm.rint.v4f32(<4 x float> %val)
|
||||||
|
ret <4 x float> %res
|
||||||
|
}
|
||||||
|
|
||||||
|
define <4 x float> @f2(<4 x float> %val) {
|
||||||
|
; CHECK-LABEL: f2:
|
||||||
|
; CHECK: vfisb %v24, %v24, 4, 0
|
||||||
|
; CHECK: br %r14
|
||||||
|
%res = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %val)
|
||||||
|
ret <4 x float> %res
|
||||||
|
}
|
||||||
|
|
||||||
|
define <4 x float> @f3(<4 x float> %val) {
|
||||||
|
; CHECK-LABEL: f3:
|
||||||
|
; CHECK: vfisb %v24, %v24, 4, 7
|
||||||
|
; CHECK: br %r14
|
||||||
|
%res = call <4 x float> @llvm.floor.v4f32(<4 x float> %val)
|
||||||
|
ret <4 x float> %res
|
||||||
|
}
|
||||||
|
|
||||||
|
define <4 x float> @f4(<4 x float> %val) {
|
||||||
|
; CHECK-LABEL: f4:
|
||||||
|
; CHECK: vfisb %v24, %v24, 4, 6
|
||||||
|
; CHECK: br %r14
|
||||||
|
%res = call <4 x float> @llvm.ceil.v4f32(<4 x float> %val)
|
||||||
|
ret <4 x float> %res
|
||||||
|
}
|
||||||
|
|
||||||
|
define <4 x float> @f5(<4 x float> %val) {
|
||||||
|
; CHECK-LABEL: f5:
|
||||||
|
; CHECK: vfisb %v24, %v24, 4, 5
|
||||||
|
; CHECK: br %r14
|
||||||
|
%res = call <4 x float> @llvm.trunc.v4f32(<4 x float> %val)
|
||||||
|
ret <4 x float> %res
|
||||||
|
}
|
||||||
|
|
||||||
|
define <4 x float> @f6(<4 x float> %val) {
|
||||||
|
; CHECK-LABEL: f6:
|
||||||
|
; CHECK: vfisb %v24, %v24, 4, 1
|
||||||
|
; CHECK: br %r14
|
||||||
|
%res = call <4 x float> @llvm.round.v4f32(<4 x float> %val)
|
||||||
|
ret <4 x float> %res
|
||||||
|
}
|
||||||
|
|
||||||
|
define float @f7(<4 x float> %val) {
|
||||||
|
; CHECK-LABEL: f7:
|
||||||
|
; CHECK: wfisb %f0, %v24, 0, 0
|
||||||
|
; CHECK: br %r14
|
||||||
|
%scalar = extractelement <4 x float> %val, i32 0
|
||||||
|
%res = call float @llvm.rint.f32(float %scalar)
|
||||||
|
ret float %res
|
||||||
|
}
|
||||||
|
|
||||||
|
define float @f8(<4 x float> %val) {
|
||||||
|
; CHECK-LABEL: f8:
|
||||||
|
; CHECK: wfisb %f0, %v24, 4, 0
|
||||||
|
; CHECK: br %r14
|
||||||
|
%scalar = extractelement <4 x float> %val, i32 0
|
||||||
|
%res = call float @llvm.nearbyint.f32(float %scalar)
|
||||||
|
ret float %res
|
||||||
|
}
|
||||||
|
|
||||||
|
define float @f9(<4 x float> %val) {
|
||||||
|
; CHECK-LABEL: f9:
|
||||||
|
; CHECK: wfisb %f0, %v24, 4, 7
|
||||||
|
; CHECK: br %r14
|
||||||
|
%scalar = extractelement <4 x float> %val, i32 0
|
||||||
|
%res = call float @llvm.floor.f32(float %scalar)
|
||||||
|
ret float %res
|
||||||
|
}
|
||||||
|
|
||||||
|
define float @f10(<4 x float> %val) {
|
||||||
|
; CHECK-LABEL: f10:
|
||||||
|
; CHECK: wfisb %f0, %v24, 4, 6
|
||||||
|
; CHECK: br %r14
|
||||||
|
%scalar = extractelement <4 x float> %val, i32 0
|
||||||
|
%res = call float @llvm.ceil.f32(float %scalar)
|
||||||
|
ret float %res
|
||||||
|
}
|
||||||
|
|
||||||
|
define float @f11(<4 x float> %val) {
|
||||||
|
; CHECK-LABEL: f11:
|
||||||
|
; CHECK: wfisb %f0, %v24, 4, 5
|
||||||
|
; CHECK: br %r14
|
||||||
|
%scalar = extractelement <4 x float> %val, i32 0
|
||||||
|
%res = call float @llvm.trunc.f32(float %scalar)
|
||||||
|
ret float %res
|
||||||
|
}
|
||||||
|
|
||||||
|
define float @f12(<4 x float> %val) {
|
||||||
|
; CHECK-LABEL: f12:
|
||||||
|
; CHECK: wfisb %f0, %v24, 4, 1
|
||||||
|
; CHECK: br %r14
|
||||||
|
%scalar = extractelement <4 x float> %val, i32 0
|
||||||
|
%res = call float @llvm.round.f32(float %scalar)
|
||||||
|
ret float %res
|
||||||
|
}
|
|
@ -0,0 +1,23 @@
|
||||||
|
; Test f32 and v4f32 square root on z14.
|
||||||
|
;
|
||||||
|
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
|
||||||
|
|
||||||
|
declare float @llvm.sqrt.f32(float)
|
||||||
|
declare <4 x float> @llvm.sqrt.v4f32(<4 x float>)
|
||||||
|
|
||||||
|
define <4 x float> @f1(<4 x float> %val) {
|
||||||
|
; CHECK-LABEL: f1:
|
||||||
|
; CHECK: vfsqsb %v24, %v24
|
||||||
|
; CHECK: br %r14
|
||||||
|
%ret = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %val)
|
||||||
|
ret <4 x float> %ret
|
||||||
|
}
|
||||||
|
|
||||||
|
define float @f2(<4 x float> %val) {
|
||||||
|
; CHECK-LABEL: f2:
|
||||||
|
; CHECK: wfsqsb %f0, %v24
|
||||||
|
; CHECK: br %r14
|
||||||
|
%scalar = extractelement <4 x float> %val, i32 0
|
||||||
|
%ret = call float @llvm.sqrt.f32(float %scalar)
|
||||||
|
ret float %ret
|
||||||
|
}
|
|
@ -0,0 +1,31 @@
|
||||||
|
; Test vector subtraction on z14.
|
||||||
|
;
|
||||||
|
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
|
||||||
|
|
||||||
|
; Test a v4f32 subtraction.
|
||||||
|
define <4 x float> @f6(<4 x float> %dummy, <4 x float> %val1,
|
||||||
|
<4 x float> %val2) {
|
||||||
|
; CHECK-LABEL: f6:
|
||||||
|
; CHECK: vfssb %v24, %v26, %v28
|
||||||
|
; CHECK: br %r14
|
||||||
|
%ret = fsub <4 x float> %val1, %val2
|
||||||
|
ret <4 x float> %ret
|
||||||
|
}
|
||||||
|
|
||||||
|
; Test an f32 subtraction that uses vector registers.
|
||||||
|
define float @f7(<4 x float> %val1, <4 x float> %val2) {
|
||||||
|
; CHECK-LABEL: f7:
|
||||||
|
; CHECK: wfssb %f0, %v24, %v26
|
||||||
|
; CHECK: br %r14
|
||||||
|
%scalar1 = extractelement <4 x float> %val1, i32 0
|
||||||
|
%scalar2 = extractelement <4 x float> %val2, i32 0
|
||||||
|
%ret = fsub float %scalar1, %scalar2
|
||||||
|
ret float %ret
|
||||||
|
}
|
||||||
|
|
||||||
|
; Test a v2f32 subtraction, which gets promoted to v4f32.
|
||||||
|
define <2 x float> @f14(<2 x float> %val1, <2 x float> %val2) {
|
||||||
|
; No particular output expected, but must compile.
|
||||||
|
%ret = fsub <2 x float> %val1, %val2
|
||||||
|
ret <2 x float> %ret
|
||||||
|
}
|
File diff suppressed because it is too large
Load Diff
|
@ -955,6 +955,40 @@
|
||||||
vfaezhs %v0, %v0
|
vfaezhs %v0, %v0
|
||||||
vfaezhs %v0, %v0, %v0, 0, 0
|
vfaezhs %v0, %v0, %v0, 0, 0
|
||||||
|
|
||||||
|
#CHECK: error: instruction requires: vector-enhancements-1
|
||||||
|
#CHECK: vfasb %v0, %v0, %v0
|
||||||
|
|
||||||
|
vfasb %v0, %v0, %v0
|
||||||
|
|
||||||
|
#CHECK: error: instruction requires: vector-enhancements-1
|
||||||
|
#CHECK: vfcesb %v0, %v0, %v0
|
||||||
|
#CHECK: error: instruction requires: vector-enhancements-1
|
||||||
|
#CHECK: vfcesbs %v0, %v0, %v0
|
||||||
|
|
||||||
|
vfcesb %v0, %v0, %v0
|
||||||
|
vfcesbs %v0, %v0, %v0
|
||||||
|
|
||||||
|
#CHECK: error: instruction requires: vector-enhancements-1
|
||||||
|
#CHECK: vfchsb %v0, %v0, %v0
|
||||||
|
#CHECK: error: instruction requires: vector-enhancements-1
|
||||||
|
#CHECK: vfchsbs %v0, %v0, %v0
|
||||||
|
|
||||||
|
vfchsb %v0, %v0, %v0
|
||||||
|
vfchsbs %v0, %v0, %v0
|
||||||
|
|
||||||
|
#CHECK: error: instruction requires: vector-enhancements-1
|
||||||
|
#CHECK: vfchesb %v0, %v0, %v0
|
||||||
|
#CHECK: error: instruction requires: vector-enhancements-1
|
||||||
|
#CHECK: vfchesbs %v0, %v0, %v0
|
||||||
|
|
||||||
|
vfchesb %v0, %v0, %v0
|
||||||
|
vfchesbs %v0, %v0, %v0
|
||||||
|
|
||||||
|
#CHECK: error: instruction requires: vector-enhancements-1
|
||||||
|
#CHECK: vfdsb %v0, %v0, %v0
|
||||||
|
|
||||||
|
vfdsb %v0, %v0, %v0
|
||||||
|
|
||||||
#CHECK: error: invalid operand
|
#CHECK: error: invalid operand
|
||||||
#CHECK: vfee %v0, %v0, %v0, 0, -1
|
#CHECK: vfee %v0, %v0, %v0, 0, -1
|
||||||
#CHECK: error: invalid operand
|
#CHECK: error: invalid operand
|
||||||
|
@ -1257,61 +1291,151 @@
|
||||||
vfidb %v0, %v0, -1, 0
|
vfidb %v0, %v0, -1, 0
|
||||||
vfidb %v0, %v0, 16, 0
|
vfidb %v0, %v0, 16, 0
|
||||||
|
|
||||||
|
#CHECK: error: instruction requires: vector-enhancements-1
|
||||||
|
#CHECK: vfisb %v0, %v0, 0, 0
|
||||||
|
|
||||||
|
vfisb %v0, %v0, 0, 0
|
||||||
|
|
||||||
#CHECK: error: instruction requires: vector-enhancements-1
|
#CHECK: error: instruction requires: vector-enhancements-1
|
||||||
#CHECK: vfkedb %v0, %v0, %v0
|
#CHECK: vfkedb %v0, %v0, %v0
|
||||||
#CHECK: error: instruction requires: vector-enhancements-1
|
#CHECK: error: instruction requires: vector-enhancements-1
|
||||||
#CHECK: vfkedbs %v0, %v0, %v0
|
#CHECK: vfkedbs %v0, %v0, %v0
|
||||||
|
#CHECK: error: instruction requires: vector-enhancements-1
|
||||||
|
#CHECK: vfkesb %v0, %v0, %v0
|
||||||
|
#CHECK: error: instruction requires: vector-enhancements-1
|
||||||
|
#CHECK: vfkesbs %v0, %v0, %v0
|
||||||
|
|
||||||
vfkedb %v0, %v0, %v0
|
vfkedb %v0, %v0, %v0
|
||||||
vfkedbs %v0, %v0, %v0
|
vfkedbs %v0, %v0, %v0
|
||||||
|
vfkesb %v0, %v0, %v0
|
||||||
|
vfkesbs %v0, %v0, %v0
|
||||||
|
|
||||||
#CHECK: error: instruction requires: vector-enhancements-1
|
#CHECK: error: instruction requires: vector-enhancements-1
|
||||||
#CHECK: vfkhdb %v0, %v0, %v0
|
#CHECK: vfkhdb %v0, %v0, %v0
|
||||||
#CHECK: error: instruction requires: vector-enhancements-1
|
#CHECK: error: instruction requires: vector-enhancements-1
|
||||||
#CHECK: vfkhdbs %v0, %v0, %v0
|
#CHECK: vfkhdbs %v0, %v0, %v0
|
||||||
|
#CHECK: error: instruction requires: vector-enhancements-1
|
||||||
|
#CHECK: vfkhsb %v0, %v0, %v0
|
||||||
|
#CHECK: error: instruction requires: vector-enhancements-1
|
||||||
|
#CHECK: vfkhsbs %v0, %v0, %v0
|
||||||
|
|
||||||
vfkhdb %v0, %v0, %v0
|
vfkhdb %v0, %v0, %v0
|
||||||
vfkhdbs %v0, %v0, %v0
|
vfkhdbs %v0, %v0, %v0
|
||||||
|
vfkhsb %v0, %v0, %v0
|
||||||
|
vfkhsbs %v0, %v0, %v0
|
||||||
|
|
||||||
#CHECK: error: instruction requires: vector-enhancements-1
|
#CHECK: error: instruction requires: vector-enhancements-1
|
||||||
#CHECK: vfkhedb %v0, %v0, %v0
|
#CHECK: vfkhedb %v0, %v0, %v0
|
||||||
#CHECK: error: instruction requires: vector-enhancements-1
|
#CHECK: error: instruction requires: vector-enhancements-1
|
||||||
#CHECK: vfkhedbs %v0, %v0, %v0
|
#CHECK: vfkhedbs %v0, %v0, %v0
|
||||||
|
#CHECK: error: instruction requires: vector-enhancements-1
|
||||||
|
#CHECK: vfkhesb %v0, %v0, %v0
|
||||||
|
#CHECK: error: instruction requires: vector-enhancements-1
|
||||||
|
#CHECK: vfkhesbs %v0, %v0, %v0
|
||||||
|
|
||||||
vfkhedb %v0, %v0, %v0
|
vfkhedb %v0, %v0, %v0
|
||||||
vfkhedbs %v0, %v0, %v0
|
vfkhedbs %v0, %v0, %v0
|
||||||
|
vfkhesb %v0, %v0, %v0
|
||||||
|
vfkhesbs %v0, %v0, %v0
|
||||||
|
|
||||||
|
#CHECK: error: instruction requires: vector-enhancements-1
|
||||||
|
#CHECK: vfpsosb %v0, %v0, 0
|
||||||
|
#CHECK: error: instruction requires: vector-enhancements-1
|
||||||
|
#CHECK: vflcsb %v0, %v0
|
||||||
|
#CHECK: error: instruction requires: vector-enhancements-1
|
||||||
|
#CHECK: vflnsb %v0, %v0
|
||||||
|
#CHECK: error: instruction requires: vector-enhancements-1
|
||||||
|
#CHECK: vflpsb %v0, %v0
|
||||||
|
|
||||||
|
vfpsosb %v0, %v0, 0
|
||||||
|
vflcsb %v0, %v0
|
||||||
|
vflnsb %v0, %v0
|
||||||
|
vflpsb %v0, %v0
|
||||||
|
|
||||||
|
#CHECK: error: instruction requires: vector-enhancements-1
|
||||||
|
#CHECK: vfll %v0, %v0, 0, 0
|
||||||
|
#CHECK: error: instruction requires: vector-enhancements-1
|
||||||
|
#CHECK: vflls %v0, %v0
|
||||||
|
|
||||||
|
vfll %v0, %v0, 0, 0
|
||||||
|
vflls %v0, %v0
|
||||||
|
|
||||||
|
#CHECK: error: instruction requires: vector-enhancements-1
|
||||||
|
#CHECK: vflr %v0, %v0, 0, 0, 0
|
||||||
|
#CHECK: error: instruction requires: vector-enhancements-1
|
||||||
|
#CHECK: vflrd %v0, %v0, 0, 0
|
||||||
|
|
||||||
|
vflr %v0, %v0, 0, 0, 0
|
||||||
|
vflrd %v0, %v0, 0, 0
|
||||||
|
|
||||||
#CHECK: error: instruction requires: vector-enhancements-1
|
#CHECK: error: instruction requires: vector-enhancements-1
|
||||||
#CHECK: vfmax %v0, %v0, %v0, 0, 0, 0
|
#CHECK: vfmax %v0, %v0, %v0, 0, 0, 0
|
||||||
#CHECK: error: instruction requires: vector-enhancements-1
|
#CHECK: error: instruction requires: vector-enhancements-1
|
||||||
#CHECK: vfmaxdb %v0, %v0, %v0, 0
|
#CHECK: vfmaxdb %v0, %v0, %v0, 0
|
||||||
|
#CHECK: error: instruction requires: vector-enhancements-1
|
||||||
|
#CHECK: vfmaxsb %v0, %v0, %v0, 0
|
||||||
|
|
||||||
vfmax %v0, %v0, %v0, 0, 0, 0
|
vfmax %v0, %v0, %v0, 0, 0, 0
|
||||||
vfmaxdb %v0, %v0, %v0, 0
|
vfmaxdb %v0, %v0, %v0, 0
|
||||||
|
vfmaxsb %v0, %v0, %v0, 0
|
||||||
|
|
||||||
#CHECK: error: instruction requires: vector-enhancements-1
|
#CHECK: error: instruction requires: vector-enhancements-1
|
||||||
#CHECK: vfmin %v0, %v0, %v0, 0, 0, 0
|
#CHECK: vfmin %v0, %v0, %v0, 0, 0, 0
|
||||||
#CHECK: error: instruction requires: vector-enhancements-1
|
#CHECK: error: instruction requires: vector-enhancements-1
|
||||||
#CHECK: vfmindb %v0, %v0, %v0, 0
|
#CHECK: vfmindb %v0, %v0, %v0, 0
|
||||||
|
#CHECK: error: instruction requires: vector-enhancements-1
|
||||||
|
#CHECK: vfminsb %v0, %v0, %v0, 0
|
||||||
|
|
||||||
vfmin %v0, %v0, %v0, 0, 0, 0
|
vfmin %v0, %v0, %v0, 0, 0, 0
|
||||||
vfmindb %v0, %v0, %v0, 0
|
vfmindb %v0, %v0, %v0, 0
|
||||||
|
vfminsb %v0, %v0, %v0, 0
|
||||||
|
|
||||||
|
#CHECK: error: instruction requires: vector-enhancements-1
|
||||||
|
#CHECK: vfmasb %v0, %v0, %v0, %v0
|
||||||
|
|
||||||
|
vfmasb %v0, %v0, %v0, %v0
|
||||||
|
|
||||||
|
#CHECK: error: instruction requires: vector-enhancements-1
|
||||||
|
#CHECK: vfmsb %v0, %v0, %v0
|
||||||
|
|
||||||
|
vfmsb %v0, %v0, %v0
|
||||||
|
|
||||||
|
#CHECK: error: instruction requires: vector-enhancements-1
|
||||||
|
#CHECK: vfmssb %v0, %v0, %v0, %v0
|
||||||
|
|
||||||
|
vfmssb %v0, %v0, %v0, %v0
|
||||||
|
|
||||||
#CHECK: error: instruction requires: vector-enhancements-1
|
#CHECK: error: instruction requires: vector-enhancements-1
|
||||||
#CHECK: vfnma %v0, %v0, %v0, %v0, 0, 0
|
#CHECK: vfnma %v0, %v0, %v0, %v0, 0, 0
|
||||||
#CHECK: error: instruction requires: vector-enhancements-1
|
#CHECK: error: instruction requires: vector-enhancements-1
|
||||||
#CHECK: vfnmadb %v0, %v0, %v0, %v0
|
#CHECK: vfnmadb %v0, %v0, %v0, %v0
|
||||||
|
#CHECK: error: instruction requires: vector-enhancements-1
|
||||||
|
#CHECK: vfnmasb %v0, %v0, %v0, %v0
|
||||||
|
|
||||||
vfnma %v0, %v0, %v0, %v0, 0, 0
|
vfnma %v0, %v0, %v0, %v0, 0, 0
|
||||||
vfnmadb %v0, %v0, %v0, %v0
|
vfnmadb %v0, %v0, %v0, %v0
|
||||||
|
vfnmasb %v0, %v0, %v0, %v0
|
||||||
|
|
||||||
#CHECK: error: instruction requires: vector-enhancements-1
|
#CHECK: error: instruction requires: vector-enhancements-1
|
||||||
#CHECK: vfnms %v0, %v0, %v0, %v0, 0, 0
|
#CHECK: vfnms %v0, %v0, %v0, %v0, 0, 0
|
||||||
#CHECK: error: instruction requires: vector-enhancements-1
|
#CHECK: error: instruction requires: vector-enhancements-1
|
||||||
#CHECK: vfnmsdb %v0, %v0, %v0, %v0
|
#CHECK: vfnmsdb %v0, %v0, %v0, %v0
|
||||||
|
#CHECK: error: instruction requires: vector-enhancements-1
|
||||||
|
#CHECK: vfnmssb %v0, %v0, %v0, %v0
|
||||||
|
|
||||||
vfnms %v0, %v0, %v0, %v0, 0, 0
|
vfnms %v0, %v0, %v0, %v0, 0, 0
|
||||||
vfnmsdb %v0, %v0, %v0, %v0
|
vfnmsdb %v0, %v0, %v0, %v0
|
||||||
|
vfnmssb %v0, %v0, %v0, %v0
|
||||||
|
|
||||||
|
#CHECK: error: instruction requires: vector-enhancements-1
|
||||||
|
#CHECK: vfssb %v0, %v0, %v0
|
||||||
|
|
||||||
|
vfssb %v0, %v0, %v0
|
||||||
|
|
||||||
|
#CHECK: error: instruction requires: vector-enhancements-1
|
||||||
|
#CHECK: vfsqsb %v0, %v0
|
||||||
|
|
||||||
|
vfsqsb %v0, %v0
|
||||||
|
|
||||||
#CHECK: error: invalid operand
|
#CHECK: error: invalid operand
|
||||||
#CHECK: vftci %v0, %v0, 0, 0, -1
|
#CHECK: vftci %v0, %v0, 0, 0, -1
|
||||||
|
@ -1341,6 +1465,11 @@
|
||||||
vftcidb %v0, %v0, -1
|
vftcidb %v0, %v0, -1
|
||||||
vftcidb %v0, %v0, 4096
|
vftcidb %v0, %v0, 4096
|
||||||
|
|
||||||
|
#CHECK: error: instruction requires: vector-enhancements-1
|
||||||
|
#CHECK: vftcisb %v0, %v0, 0
|
||||||
|
|
||||||
|
vftcisb %v0, %v0, 0
|
||||||
|
|
||||||
#CHECK: error: invalid operand
|
#CHECK: error: invalid operand
|
||||||
#CHECK: vgbm %v0, -1
|
#CHECK: vgbm %v0, -1
|
||||||
#CHECK: error: invalid operand
|
#CHECK: error: invalid operand
|
||||||
|
@ -2607,6 +2736,45 @@
|
||||||
wclgdb %v0, %v0, -1, 0
|
wclgdb %v0, %v0, -1, 0
|
||||||
wclgdb %v0, %v0, 16, 0
|
wclgdb %v0, %v0, 16, 0
|
||||||
|
|
||||||
|
#CHECK: error: instruction requires: vector-enhancements-1
|
||||||
|
#CHECK: wfasb %v0, %v0, %v0
|
||||||
|
|
||||||
|
wfasb %v0, %v0, %v0
|
||||||
|
|
||||||
|
#CHECK: error: instruction requires: vector-enhancements-1
|
||||||
|
#CHECK: wfcsb %v0, %v0
|
||||||
|
|
||||||
|
wfcsb %v0, %v0
|
||||||
|
|
||||||
|
#CHECK: error: instruction requires: vector-enhancements-1
|
||||||
|
#CHECK: wfcesb %v0, %v0, %v0
|
||||||
|
#CHECK: error: instruction requires: vector-enhancements-1
|
||||||
|
#CHECK: wfcesbs %v0, %v0, %v0
|
||||||
|
|
||||||
|
wfcesb %v0, %v0, %v0
|
||||||
|
wfcesbs %v0, %v0, %v0
|
||||||
|
|
||||||
|
#CHECK: error: instruction requires: vector-enhancements-1
|
||||||
|
#CHECK: wfchsb %v0, %v0, %v0
|
||||||
|
#CHECK: error: instruction requires: vector-enhancements-1
|
||||||
|
#CHECK: wfchsbs %v0, %v0, %v0
|
||||||
|
|
||||||
|
wfchsb %v0, %v0, %v0
|
||||||
|
wfchsbs %v0, %v0, %v0
|
||||||
|
|
||||||
|
#CHECK: error: instruction requires: vector-enhancements-1
|
||||||
|
#CHECK: wfchesb %v0, %v0, %v0
|
||||||
|
#CHECK: error: instruction requires: vector-enhancements-1
|
||||||
|
#CHECK: wfchesbs %v0, %v0, %v0
|
||||||
|
|
||||||
|
wfchesb %v0, %v0, %v0
|
||||||
|
wfchesbs %v0, %v0, %v0
|
||||||
|
|
||||||
|
#CHECK: error: instruction requires: vector-enhancements-1
|
||||||
|
#CHECK: wfdsb %v0, %v0, %v0
|
||||||
|
|
||||||
|
wfdsb %v0, %v0, %v0
|
||||||
|
|
||||||
#CHECK: error: invalid operand
|
#CHECK: error: invalid operand
|
||||||
#CHECK: wfidb %v0, %v0, 0, -1
|
#CHECK: wfidb %v0, %v0, 0, -1
|
||||||
#CHECK: error: invalid operand
|
#CHECK: error: invalid operand
|
||||||
|
@ -2621,49 +2789,138 @@
|
||||||
wfidb %v0, %v0, -1, 0
|
wfidb %v0, %v0, -1, 0
|
||||||
wfidb %v0, %v0, 16, 0
|
wfidb %v0, %v0, 16, 0
|
||||||
|
|
||||||
|
#CHECK: error: instruction requires: vector-enhancements-1
|
||||||
|
#CHECK: wfisb %v0, %v0, 0, 0
|
||||||
|
|
||||||
|
wfisb %v0, %v0, 0, 0
|
||||||
|
|
||||||
|
#CHECK: error: instruction requires: vector-enhancements-1
|
||||||
|
#CHECK: wfksb %v0, %v0
|
||||||
|
|
||||||
|
wfksb %v0, %v0
|
||||||
|
|
||||||
#CHECK: error: instruction requires: vector-enhancements-1
|
#CHECK: error: instruction requires: vector-enhancements-1
|
||||||
#CHECK: wfkedb %v0, %v0, %v0
|
#CHECK: wfkedb %v0, %v0, %v0
|
||||||
#CHECK: error: instruction requires: vector-enhancements-1
|
#CHECK: error: instruction requires: vector-enhancements-1
|
||||||
#CHECK: wfkedbs %v0, %v0, %v0
|
#CHECK: wfkedbs %v0, %v0, %v0
|
||||||
|
#CHECK: error: instruction requires: vector-enhancements-1
|
||||||
|
#CHECK: wfkesb %v0, %v0, %v0
|
||||||
|
#CHECK: error: instruction requires: vector-enhancements-1
|
||||||
|
#CHECK: wfkesbs %v0, %v0, %v0
|
||||||
|
|
||||||
wfkedb %v0, %v0, %v0
|
wfkedb %v0, %v0, %v0
|
||||||
wfkedbs %v0, %v0, %v0
|
wfkedbs %v0, %v0, %v0
|
||||||
|
wfkesb %v0, %v0, %v0
|
||||||
|
wfkesbs %v0, %v0, %v0
|
||||||
|
|
||||||
#CHECK: error: instruction requires: vector-enhancements-1
|
#CHECK: error: instruction requires: vector-enhancements-1
|
||||||
#CHECK: wfkhdb %v0, %v0, %v0
|
#CHECK: wfkhdb %v0, %v0, %v0
|
||||||
#CHECK: error: instruction requires: vector-enhancements-1
|
#CHECK: error: instruction requires: vector-enhancements-1
|
||||||
#CHECK: wfkhdbs %v0, %v0, %v0
|
#CHECK: wfkhdbs %v0, %v0, %v0
|
||||||
|
#CHECK: error: instruction requires: vector-enhancements-1
|
||||||
|
#CHECK: wfkhsb %v0, %v0, %v0
|
||||||
|
#CHECK: error: instruction requires: vector-enhancements-1
|
||||||
|
#CHECK: wfkhsbs %v0, %v0, %v0
|
||||||
|
|
||||||
wfkhdb %v0, %v0, %v0
|
wfkhdb %v0, %v0, %v0
|
||||||
wfkhdbs %v0, %v0, %v0
|
wfkhdbs %v0, %v0, %v0
|
||||||
|
wfkhsb %v0, %v0, %v0
|
||||||
|
wfkhsbs %v0, %v0, %v0
|
||||||
|
|
||||||
#CHECK: error: instruction requires: vector-enhancements-1
|
#CHECK: error: instruction requires: vector-enhancements-1
|
||||||
#CHECK: wfkhedb %v0, %v0, %v0
|
#CHECK: wfkhedb %v0, %v0, %v0
|
||||||
#CHECK: error: instruction requires: vector-enhancements-1
|
#CHECK: error: instruction requires: vector-enhancements-1
|
||||||
#CHECK: wfkhedbs %v0, %v0, %v0
|
#CHECK: wfkhedbs %v0, %v0, %v0
|
||||||
|
#CHECK: error: instruction requires: vector-enhancements-1
|
||||||
|
#CHECK: wfkhesb %v0, %v0, %v0
|
||||||
|
#CHECK: error: instruction requires: vector-enhancements-1
|
||||||
|
#CHECK: wfkhesbs %v0, %v0, %v0
|
||||||
|
|
||||||
wfkhedb %v0, %v0, %v0
|
wfkhedb %v0, %v0, %v0
|
||||||
wfkhedbs %v0, %v0, %v0
|
wfkhedbs %v0, %v0, %v0
|
||||||
|
wfkhesb %v0, %v0, %v0
|
||||||
|
wfkhesbs %v0, %v0, %v0
|
||||||
|
|
||||||
|
#CHECK: error: instruction requires: vector-enhancements-1
|
||||||
|
#CHECK: wfpsosb %v0, %v0, 0
|
||||||
|
#CHECK: error: instruction requires: vector-enhancements-1
|
||||||
|
#CHECK: wflcsb %v0, %v0
|
||||||
|
#CHECK: error: instruction requires: vector-enhancements-1
|
||||||
|
#CHECK: wflnsb %v0, %v0
|
||||||
|
#CHECK: error: instruction requires: vector-enhancements-1
|
||||||
|
#CHECK: wflpsb %v0, %v0
|
||||||
|
|
||||||
|
wfpsosb %v0, %v0, 0
|
||||||
|
wflcsb %v0, %v0
|
||||||
|
wflnsb %v0, %v0
|
||||||
|
wflpsb %v0, %v0
|
||||||
|
|
||||||
|
#CHECK: error: instruction requires: vector-enhancements-1
|
||||||
|
#CHECK: wflls %v0, %v0
|
||||||
|
|
||||||
|
wflls %v0, %v0
|
||||||
|
|
||||||
|
#CHECK: error: instruction requires: vector-enhancements-1
|
||||||
|
#CHECK: wflrd %v0, %v0, 0, 0
|
||||||
|
|
||||||
|
wflrd %v0, %v0, 0, 0
|
||||||
|
|
||||||
#CHECK: error: instruction requires: vector-enhancements-1
|
#CHECK: error: instruction requires: vector-enhancements-1
|
||||||
#CHECK: wfmaxdb %v0, %v0, %v0, 0
|
#CHECK: wfmaxdb %v0, %v0, %v0, 0
|
||||||
|
#CHECK: error: instruction requires: vector-enhancements-1
|
||||||
|
#CHECK: wfmaxsb %v0, %v0, %v0, 0
|
||||||
|
|
||||||
wfmaxdb %v0, %v0, %v0, 0
|
wfmaxdb %v0, %v0, %v0, 0
|
||||||
|
wfmaxsb %v0, %v0, %v0, 0
|
||||||
|
|
||||||
#CHECK: error: instruction requires: vector-enhancements-1
|
#CHECK: error: instruction requires: vector-enhancements-1
|
||||||
#CHECK: wfmindb %v0, %v0, %v0, 0
|
#CHECK: wfmindb %v0, %v0, %v0, 0
|
||||||
|
#CHECK: error: instruction requires: vector-enhancements-1
|
||||||
|
#CHECK: wfminsb %v0, %v0, %v0, 0
|
||||||
|
|
||||||
wfmindb %v0, %v0, %v0, 0
|
wfmindb %v0, %v0, %v0, 0
|
||||||
|
wfminsb %v0, %v0, %v0, 0
|
||||||
|
|
||||||
|
#CHECK: error: instruction requires: vector-enhancements-1
|
||||||
|
#CHECK: wfmasb %v0, %v0, %v0, %v0
|
||||||
|
|
||||||
|
wfmasb %v0, %v0, %v0, %v0
|
||||||
|
|
||||||
|
#CHECK: error: instruction requires: vector-enhancements-1
|
||||||
|
#CHECK: wfmsb %v0, %v0, %v0
|
||||||
|
|
||||||
|
wfmsb %v0, %v0, %v0
|
||||||
|
|
||||||
|
#CHECK: error: instruction requires: vector-enhancements-1
|
||||||
|
#CHECK: wfmssb %v0, %v0, %v0, %v0
|
||||||
|
|
||||||
|
wfmssb %v0, %v0, %v0, %v0
|
||||||
|
|
||||||
#CHECK: error: instruction requires: vector-enhancements-1
|
#CHECK: error: instruction requires: vector-enhancements-1
|
||||||
#CHECK: wfnmadb %v0, %v0, %v0, %v0
|
#CHECK: wfnmadb %v0, %v0, %v0, %v0
|
||||||
|
#CHECK: error: instruction requires: vector-enhancements-1
|
||||||
|
#CHECK: wfnmasb %v0, %v0, %v0, %v0
|
||||||
|
|
||||||
wfnmadb %v0, %v0, %v0, %v0
|
wfnmadb %v0, %v0, %v0, %v0
|
||||||
|
wfnmasb %v0, %v0, %v0, %v0
|
||||||
|
|
||||||
#CHECK: error: instruction requires: vector-enhancements-1
|
#CHECK: error: instruction requires: vector-enhancements-1
|
||||||
#CHECK: wfnmsdb %v0, %v0, %v0, %v0
|
#CHECK: wfnmsdb %v0, %v0, %v0, %v0
|
||||||
|
#CHECK: error: instruction requires: vector-enhancements-1
|
||||||
|
#CHECK: wfnmssb %v0, %v0, %v0, %v0
|
||||||
|
|
||||||
wfnmsdb %v0, %v0, %v0, %v0
|
wfnmsdb %v0, %v0, %v0, %v0
|
||||||
|
wfnmssb %v0, %v0, %v0, %v0
|
||||||
|
|
||||||
|
#CHECK: error: instruction requires: vector-enhancements-1
|
||||||
|
#CHECK: wfssb %v0, %v0, %v0
|
||||||
|
|
||||||
|
wfssb %v0, %v0, %v0
|
||||||
|
|
||||||
|
#CHECK: error: instruction requires: vector-enhancements-1
|
||||||
|
#CHECK: wfsqsb %v0, %v0
|
||||||
|
|
||||||
|
wfsqsb %v0, %v0
|
||||||
|
|
||||||
#CHECK: error: invalid operand
|
#CHECK: error: invalid operand
|
||||||
#CHECK: wftcidb %v0, %v0, -1
|
#CHECK: wftcidb %v0, %v0, -1
|
||||||
|
@ -2673,6 +2930,11 @@
|
||||||
wftcidb %v0, %v0, -1
|
wftcidb %v0, %v0, -1
|
||||||
wftcidb %v0, %v0, 4096
|
wftcidb %v0, %v0, 4096
|
||||||
|
|
||||||
|
#CHECK: error: instruction requires: vector-enhancements-1
|
||||||
|
#CHECK: wftcisb %v0, %v0, 0
|
||||||
|
|
||||||
|
wftcisb %v0, %v0, 0
|
||||||
|
|
||||||
#CHECK: error: invalid operand
|
#CHECK: error: invalid operand
|
||||||
#CHECK: wledb %v0, %v0, 0, -1
|
#CHECK: wledb %v0, %v0, 0, -1
|
||||||
#CHECK: error: invalid operand
|
#CHECK: error: invalid operand
|
||||||
|
|
|
@ -213,6 +213,68 @@
|
||||||
vdp %v0, %v0, %v0, -1, 0
|
vdp %v0, %v0, %v0, -1, 0
|
||||||
vdp %v0, %v0, %v0, 256, 0
|
vdp %v0, %v0, %v0, 256, 0
|
||||||
|
|
||||||
|
#CHECK: error: invalid operand
|
||||||
|
#CHECK: vfisb %v0, %v0, 0, -1
|
||||||
|
#CHECK: error: invalid operand
|
||||||
|
#CHECK: vfisb %v0, %v0, 0, 16
|
||||||
|
#CHECK: error: invalid operand
|
||||||
|
#CHECK: vfisb %v0, %v0, -1, 0
|
||||||
|
#CHECK: error: invalid operand
|
||||||
|
#CHECK: vfisb %v0, %v0, 16, 0
|
||||||
|
|
||||||
|
vfisb %v0, %v0, 0, -1
|
||||||
|
vfisb %v0, %v0, 0, 16
|
||||||
|
vfisb %v0, %v0, -1, 0
|
||||||
|
vfisb %v0, %v0, 16, 0
|
||||||
|
|
||||||
|
#CHECK: error: invalid operand
|
||||||
|
#CHECK: vfll %v0, %v0, 0, -1
|
||||||
|
#CHECK: error: invalid operand
|
||||||
|
#CHECK: vfll %v0, %v0, 0, 16
|
||||||
|
#CHECK: error: invalid operand
|
||||||
|
#CHECK: vfll %v0, %v0, -1, 0
|
||||||
|
#CHECK: error: invalid operand
|
||||||
|
#CHECK: vfll %v0, %v0, 16, 0
|
||||||
|
|
||||||
|
vfll %v0, %v0, 0, -1
|
||||||
|
vfll %v0, %v0, 0, 16
|
||||||
|
vfll %v0, %v0, -1, 0
|
||||||
|
vfll %v0, %v0, 16, 0
|
||||||
|
|
||||||
|
#CHECK: error: invalid operand
|
||||||
|
#CHECK: vflr %v0, %v0, 0, 0, -1
|
||||||
|
#CHECK: error: invalid operand
|
||||||
|
#CHECK: vflr %v0, %v0, 0, 0, 16
|
||||||
|
#CHECK: error: invalid operand
|
||||||
|
#CHECK: vflr %v0, %v0, 0, -1, 0
|
||||||
|
#CHECK: error: invalid operand
|
||||||
|
#CHECK: vflr %v0, %v0, 0, 16, 0
|
||||||
|
#CHECK: error: invalid operand
|
||||||
|
#CHECK: vflr %v0, %v0, -1, 0, 0
|
||||||
|
#CHECK: error: invalid operand
|
||||||
|
#CHECK: vflr %v0, %v0, 16, 0, 0
|
||||||
|
|
||||||
|
vflr %v0, %v0, 0, 0, -1
|
||||||
|
vflr %v0, %v0, 0, 0, 16
|
||||||
|
vflr %v0, %v0, 0, -1, 0
|
||||||
|
vflr %v0, %v0, 0, 16, 0
|
||||||
|
vflr %v0, %v0, -1, 0, 0
|
||||||
|
vflr %v0, %v0, 16, 0, 0
|
||||||
|
|
||||||
|
#CHECK: error: invalid operand
|
||||||
|
#CHECK: vflrd %v0, %v0, 0, -1
|
||||||
|
#CHECK: error: invalid operand
|
||||||
|
#CHECK: vflrd %v0, %v0, 0, 16
|
||||||
|
#CHECK: error: invalid operand
|
||||||
|
#CHECK: vflrd %v0, %v0, -1, 0
|
||||||
|
#CHECK: error: invalid operand
|
||||||
|
#CHECK: vflrd %v0, %v0, 16, 0
|
||||||
|
|
||||||
|
vflrd %v0, %v0, 0, -1
|
||||||
|
vflrd %v0, %v0, 0, 16
|
||||||
|
vflrd %v0, %v0, -1, 0
|
||||||
|
vflrd %v0, %v0, 16, 0
|
||||||
|
|
||||||
#CHECK: error: invalid operand
|
#CHECK: error: invalid operand
|
||||||
#CHECK: vfmax %v0, %v0, %v0, 0, 0, -1
|
#CHECK: vfmax %v0, %v0, %v0, 0, 0, -1
|
||||||
#CHECK: error: invalid operand
|
#CHECK: error: invalid operand
|
||||||
|
@ -241,6 +303,14 @@
|
||||||
vfmaxdb %v0, %v0, %v0, -1
|
vfmaxdb %v0, %v0, %v0, -1
|
||||||
vfmaxdb %v0, %v0, %v0, 16
|
vfmaxdb %v0, %v0, %v0, 16
|
||||||
|
|
||||||
|
#CHECK: error: invalid operand
|
||||||
|
#CHECK: vfmaxsb %v0, %v0, %v0, -1
|
||||||
|
#CHECK: error: invalid operand
|
||||||
|
#CHECK: vfmaxsb %v0, %v0, %v0, 16
|
||||||
|
|
||||||
|
vfmaxsb %v0, %v0, %v0, -1
|
||||||
|
vfmaxsb %v0, %v0, %v0, 16
|
||||||
|
|
||||||
#CHECK: error: invalid operand
|
#CHECK: error: invalid operand
|
||||||
#CHECK: vfmin %v0, %v0, %v0, 0, 0, -1
|
#CHECK: vfmin %v0, %v0, %v0, 0, 0, -1
|
||||||
#CHECK: error: invalid operand
|
#CHECK: error: invalid operand
|
||||||
|
@ -269,6 +339,14 @@
|
||||||
vfmindb %v0, %v0, %v0, -1
|
vfmindb %v0, %v0, %v0, -1
|
||||||
vfmindb %v0, %v0, %v0, 16
|
vfmindb %v0, %v0, %v0, 16
|
||||||
|
|
||||||
|
#CHECK: error: invalid operand
|
||||||
|
#CHECK: vfminsb %v0, %v0, %v0, -1
|
||||||
|
#CHECK: error: invalid operand
|
||||||
|
#CHECK: vfminsb %v0, %v0, %v0, 16
|
||||||
|
|
||||||
|
vfminsb %v0, %v0, %v0, -1
|
||||||
|
vfminsb %v0, %v0, %v0, 16
|
||||||
|
|
||||||
#CHECK: error: invalid operand
|
#CHECK: error: invalid operand
|
||||||
#CHECK: vfnma %v0, %v0, %v0, %v0, 0, -1
|
#CHECK: vfnma %v0, %v0, %v0, %v0, 0, -1
|
||||||
#CHECK: error: invalid operand
|
#CHECK: error: invalid operand
|
||||||
|
@ -297,6 +375,14 @@
|
||||||
vfnms %v0, %v0, %v0, %v0, -1, 0
|
vfnms %v0, %v0, %v0, %v0, -1, 0
|
||||||
vfnms %v0, %v0, %v0, %v0, 16, 0
|
vfnms %v0, %v0, %v0, %v0, 16, 0
|
||||||
|
|
||||||
|
#CHECK: error: invalid operand
|
||||||
|
#CHECK: vftcisb %v0, %v0, -1
|
||||||
|
#CHECK: error: invalid operand
|
||||||
|
#CHECK: vftcisb %v0, %v0, 4096
|
||||||
|
|
||||||
|
vftcisb %v0, %v0, -1
|
||||||
|
vftcisb %v0, %v0, 4096
|
||||||
|
|
||||||
#CHECK: error: invalid operand
|
#CHECK: error: invalid operand
|
||||||
#CHECK: vlip %v0, 0, -1
|
#CHECK: vlip %v0, 0, -1
|
||||||
#CHECK: error: invalid operand
|
#CHECK: error: invalid operand
|
||||||
|
@ -544,6 +630,34 @@
|
||||||
vupkz %v0, 4096, 0
|
vupkz %v0, 4096, 0
|
||||||
vupkz %v0, 0(%r0), 0
|
vupkz %v0, 0(%r0), 0
|
||||||
|
|
||||||
|
#CHECK: error: invalid operand
|
||||||
|
#CHECK: wfisb %v0, %v0, 0, -1
|
||||||
|
#CHECK: error: invalid operand
|
||||||
|
#CHECK: wfisb %v0, %v0, 0, 16
|
||||||
|
#CHECK: error: invalid operand
|
||||||
|
#CHECK: wfisb %v0, %v0, -1, 0
|
||||||
|
#CHECK: error: invalid operand
|
||||||
|
#CHECK: wfisb %v0, %v0, 16, 0
|
||||||
|
|
||||||
|
wfisb %v0, %v0, 0, -1
|
||||||
|
wfisb %v0, %v0, 0, 16
|
||||||
|
wfisb %v0, %v0, -1, 0
|
||||||
|
wfisb %v0, %v0, 16, 0
|
||||||
|
|
||||||
|
#CHECK: error: invalid operand
|
||||||
|
#CHECK: wflrd %v0, %v0, 0, -1
|
||||||
|
#CHECK: error: invalid operand
|
||||||
|
#CHECK: wflrd %v0, %v0, 0, 16
|
||||||
|
#CHECK: error: invalid operand
|
||||||
|
#CHECK: wflrd %v0, %v0, -1, 0
|
||||||
|
#CHECK: error: invalid operand
|
||||||
|
#CHECK: wflrd %v0, %v0, 16, 0
|
||||||
|
|
||||||
|
wflrd %v0, %v0, 0, -1
|
||||||
|
wflrd %v0, %v0, 0, 16
|
||||||
|
wflrd %v0, %v0, -1, 0
|
||||||
|
wflrd %v0, %v0, 16, 0
|
||||||
|
|
||||||
#CHECK: error: invalid operand
|
#CHECK: error: invalid operand
|
||||||
#CHECK: wfmaxdb %v0, %v0, %v0, -1
|
#CHECK: wfmaxdb %v0, %v0, %v0, -1
|
||||||
#CHECK: error: invalid operand
|
#CHECK: error: invalid operand
|
||||||
|
@ -552,6 +666,14 @@
|
||||||
wfmaxdb %v0, %v0, %v0, -1
|
wfmaxdb %v0, %v0, %v0, -1
|
||||||
wfmaxdb %v0, %v0, %v0, 16
|
wfmaxdb %v0, %v0, %v0, 16
|
||||||
|
|
||||||
|
#CHECK: error: invalid operand
|
||||||
|
#CHECK: wfmaxsb %v0, %v0, %v0, -1
|
||||||
|
#CHECK: error: invalid operand
|
||||||
|
#CHECK: wfmaxsb %v0, %v0, %v0, 16
|
||||||
|
|
||||||
|
wfmaxsb %v0, %v0, %v0, -1
|
||||||
|
wfmaxsb %v0, %v0, %v0, 16
|
||||||
|
|
||||||
#CHECK: error: invalid operand
|
#CHECK: error: invalid operand
|
||||||
#CHECK: wfmindb %v0, %v0, %v0, -1
|
#CHECK: wfmindb %v0, %v0, %v0, -1
|
||||||
#CHECK: error: invalid operand
|
#CHECK: error: invalid operand
|
||||||
|
@ -560,3 +682,19 @@
|
||||||
wfmindb %v0, %v0, %v0, -1
|
wfmindb %v0, %v0, %v0, -1
|
||||||
wfmindb %v0, %v0, %v0, 16
|
wfmindb %v0, %v0, %v0, 16
|
||||||
|
|
||||||
|
#CHECK: error: invalid operand
|
||||||
|
#CHECK: wfminsb %v0, %v0, %v0, -1
|
||||||
|
#CHECK: error: invalid operand
|
||||||
|
#CHECK: wfminsb %v0, %v0, %v0, 16
|
||||||
|
|
||||||
|
wfminsb %v0, %v0, %v0, -1
|
||||||
|
wfminsb %v0, %v0, %v0, 16
|
||||||
|
|
||||||
|
#CHECK: error: invalid operand
|
||||||
|
#CHECK: wftcisb %v0, %v0, -1
|
||||||
|
#CHECK: error: invalid operand
|
||||||
|
#CHECK: wftcisb %v0, %v0, 4096
|
||||||
|
|
||||||
|
wftcisb %v0, %v0, -1
|
||||||
|
wftcisb %v0, %v0, 4096
|
||||||
|
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue