forked from OSchip/llvm-project
Enable STRICT_FP_TO_SINT/UINT on X86 backend
This patch is mainly for custom lowering the vector operation. Differential Revision: https://reviews.llvm.org/D71592
This commit is contained in:
parent
97b5d6bfdc
commit
2f932b5729
|
@ -506,6 +506,8 @@ SDValue VectorLegalizer::Promote(SDValue Op) {
|
|||
return PromoteINT_TO_FP(Op);
|
||||
case ISD::FP_TO_UINT:
|
||||
case ISD::FP_TO_SINT:
|
||||
case ISD::STRICT_FP_TO_UINT:
|
||||
case ISD::STRICT_FP_TO_SINT:
|
||||
// Promote the operation by extending the operand.
|
||||
return PromoteFP_TO_INT(Op);
|
||||
}
|
||||
|
@ -575,6 +577,7 @@ SDValue VectorLegalizer::PromoteINT_TO_FP(SDValue Op) {
|
|||
SDValue VectorLegalizer::PromoteFP_TO_INT(SDValue Op) {
|
||||
MVT VT = Op.getSimpleValueType();
|
||||
MVT NVT = TLI.getTypeToPromoteTo(Op.getOpcode(), VT);
|
||||
bool IsStrict = Op->isStrictFPOpcode();
|
||||
assert(NVT.getVectorNumElements() == VT.getVectorNumElements() &&
|
||||
"Vectors have different number of elements!");
|
||||
|
||||
|
@ -585,17 +588,35 @@ SDValue VectorLegalizer::PromoteFP_TO_INT(SDValue Op) {
|
|||
TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NVT))
|
||||
NewOpc = ISD::FP_TO_SINT;
|
||||
|
||||
if (NewOpc == ISD::STRICT_FP_TO_UINT &&
|
||||
TLI.isOperationLegalOrCustom(ISD::STRICT_FP_TO_SINT, NVT))
|
||||
NewOpc = ISD::STRICT_FP_TO_SINT;
|
||||
|
||||
SDLoc dl(Op);
|
||||
SDValue Promoted = DAG.getNode(NewOpc, dl, NVT, Op.getOperand(0));
|
||||
SDValue Promoted, Chain;
|
||||
if (IsStrict) {
|
||||
Promoted = DAG.getNode(NewOpc, dl, {NVT, MVT::Other},
|
||||
{Op.getOperand(0), Op.getOperand(1)});
|
||||
Chain = Promoted.getValue(1);
|
||||
} else
|
||||
Promoted = DAG.getNode(NewOpc, dl, NVT, Op.getOperand(0));
|
||||
|
||||
// Assert that the converted value fits in the original type. If it doesn't
|
||||
// (eg: because the value being converted is too big), then the result of the
|
||||
// original operation was undefined anyway, so the assert is still correct.
|
||||
Promoted = DAG.getNode(Op->getOpcode() == ISD::FP_TO_UINT ? ISD::AssertZext
|
||||
: ISD::AssertSext,
|
||||
dl, NVT, Promoted,
|
||||
if (Op->getOpcode() == ISD::FP_TO_UINT ||
|
||||
Op->getOpcode() == ISD::STRICT_FP_TO_UINT)
|
||||
NewOpc = ISD::AssertZext;
|
||||
else
|
||||
NewOpc = ISD::AssertSext;
|
||||
|
||||
Promoted = DAG.getNode(NewOpc, dl, NVT, Promoted,
|
||||
DAG.getValueType(VT.getScalarType()));
|
||||
return DAG.getNode(ISD::TRUNCATE, dl, VT, Promoted);
|
||||
Promoted = DAG.getNode(ISD::TRUNCATE, dl, VT, Promoted);
|
||||
if (IsStrict)
|
||||
return DAG.getMergeValues({Promoted, Chain}, dl);
|
||||
|
||||
return Promoted;
|
||||
}
|
||||
|
||||
SDValue VectorLegalizer::ExpandLoad(SDValue Op) {
|
||||
|
|
|
@ -816,7 +816,9 @@ void X86DAGToDAGISel::PreprocessISelDAG() {
|
|||
|
||||
switch (N->getOpcode()) {
|
||||
case ISD::FP_TO_SINT:
|
||||
case ISD::FP_TO_UINT: {
|
||||
case ISD::FP_TO_UINT:
|
||||
case ISD::STRICT_FP_TO_SINT:
|
||||
case ISD::STRICT_FP_TO_UINT: {
|
||||
// Replace vector fp_to_s/uint with their X86 specific equivalent so we
|
||||
// don't need 2 sets of patterns.
|
||||
if (!N->getSimpleValueType(0).isVector())
|
||||
|
@ -825,13 +827,27 @@ void X86DAGToDAGISel::PreprocessISelDAG() {
|
|||
unsigned NewOpc;
|
||||
switch (N->getOpcode()) {
|
||||
default: llvm_unreachable("Unexpected opcode!");
|
||||
case ISD::STRICT_FP_TO_SINT:
|
||||
case ISD::FP_TO_SINT: NewOpc = X86ISD::CVTTP2SI; break;
|
||||
case ISD::STRICT_FP_TO_UINT:
|
||||
case ISD::FP_TO_UINT: NewOpc = X86ISD::CVTTP2UI; break;
|
||||
}
|
||||
SDValue Res = CurDAG->getNode(NewOpc, SDLoc(N), N->getValueType(0),
|
||||
N->getOperand(0));
|
||||
SDValue Res;
|
||||
if (N->isStrictFPOpcode())
|
||||
Res =
|
||||
CurDAG->getNode(NewOpc, SDLoc(N), {N->getValueType(0), MVT::Other},
|
||||
{N->getOperand(0), N->getOperand(1)});
|
||||
else
|
||||
Res =
|
||||
CurDAG->getNode(NewOpc, SDLoc(N), {N->getValueType(0), MVT::Other},
|
||||
{CurDAG->getEntryNode(), N->getOperand(0)});
|
||||
--I;
|
||||
CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res);
|
||||
if (N->isStrictFPOpcode()) {
|
||||
SDValue From[] = {SDValue(N, 0), SDValue(N, 1)};
|
||||
SDValue To[] = {Res.getValue(0), Res.getValue(1)};
|
||||
CurDAG->ReplaceAllUsesOfValuesWith(From, To, 2);
|
||||
} else
|
||||
CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res);
|
||||
++I;
|
||||
CurDAG->DeleteNode(N);
|
||||
continue;
|
||||
|
|
|
@ -979,18 +979,16 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
|
|||
|
||||
setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
|
||||
setOperationAction(ISD::FP_TO_SINT, MVT::v2i32, Custom);
|
||||
setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v4i32, Legal);
|
||||
setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2i32, Custom);
|
||||
|
||||
// Custom legalize these to avoid over promotion or custom promotion.
|
||||
setOperationAction(ISD::FP_TO_SINT, MVT::v2i8, Custom);
|
||||
setOperationAction(ISD::FP_TO_SINT, MVT::v4i8, Custom);
|
||||
setOperationAction(ISD::FP_TO_SINT, MVT::v8i8, Custom);
|
||||
setOperationAction(ISD::FP_TO_SINT, MVT::v2i16, Custom);
|
||||
setOperationAction(ISD::FP_TO_SINT, MVT::v4i16, Custom);
|
||||
setOperationAction(ISD::FP_TO_UINT, MVT::v2i8, Custom);
|
||||
setOperationAction(ISD::FP_TO_UINT, MVT::v4i8, Custom);
|
||||
setOperationAction(ISD::FP_TO_UINT, MVT::v8i8, Custom);
|
||||
setOperationAction(ISD::FP_TO_UINT, MVT::v2i16, Custom);
|
||||
setOperationAction(ISD::FP_TO_UINT, MVT::v4i16, Custom);
|
||||
for (auto VT : {MVT::v2i8, MVT::v4i8, MVT::v8i8, MVT::v2i16, MVT::v4i16}) {
|
||||
setOperationAction(ISD::FP_TO_SINT, VT, Custom);
|
||||
setOperationAction(ISD::FP_TO_UINT, VT, Custom);
|
||||
setOperationAction(ISD::STRICT_FP_TO_SINT, VT, Custom);
|
||||
setOperationAction(ISD::STRICT_FP_TO_UINT, VT, Custom);
|
||||
}
|
||||
|
||||
setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
|
||||
setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i32, Legal);
|
||||
|
@ -1164,9 +1162,12 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
|
|||
|
||||
// (fp_to_int:v8i16 (v8f32 ..)) requires the result type to be promoted
|
||||
// even though v8i16 is a legal type.
|
||||
setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v8i16, MVT::v8i32);
|
||||
setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v8i16, MVT::v8i32);
|
||||
setOperationAction(ISD::FP_TO_SINT, MVT::v8i32, Legal);
|
||||
setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v8i16, MVT::v8i32);
|
||||
setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v8i16, MVT::v8i32);
|
||||
setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, MVT::v8i16, MVT::v8i32);
|
||||
setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, MVT::v8i16, MVT::v8i32);
|
||||
setOperationAction(ISD::FP_TO_SINT, MVT::v8i32, Legal);
|
||||
setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v8i32, Legal);
|
||||
|
||||
setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Legal);
|
||||
|
||||
|
@ -1361,12 +1362,18 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
|
|||
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v1i1, Custom);
|
||||
setOperationAction(ISD::BUILD_VECTOR, MVT::v1i1, Custom);
|
||||
|
||||
setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v8i1, MVT::v8i32);
|
||||
setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v8i1, MVT::v8i32);
|
||||
setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v4i1, MVT::v4i32);
|
||||
setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v4i1, MVT::v4i32);
|
||||
setOperationAction(ISD::FP_TO_SINT, MVT::v2i1, Custom);
|
||||
setOperationAction(ISD::FP_TO_UINT, MVT::v2i1, Custom);
|
||||
setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v8i1, MVT::v8i32);
|
||||
setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v8i1, MVT::v8i32);
|
||||
setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v4i1, MVT::v4i32);
|
||||
setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v4i1, MVT::v4i32);
|
||||
setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, MVT::v8i1, MVT::v8i32);
|
||||
setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, MVT::v8i1, MVT::v8i32);
|
||||
setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, MVT::v4i1, MVT::v4i32);
|
||||
setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, MVT::v4i1, MVT::v4i32);
|
||||
setOperationAction(ISD::FP_TO_SINT, MVT::v2i1, Custom);
|
||||
setOperationAction(ISD::FP_TO_UINT, MVT::v2i1, Custom);
|
||||
setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2i1, Custom);
|
||||
setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2i1, Custom);
|
||||
|
||||
// There is no byte sized k-register load or store without AVX512DQ.
|
||||
if (!Subtarget.hasDQI()) {
|
||||
|
@ -1440,16 +1447,18 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
|
|||
setOperationAction(ISD::FCOPYSIGN, VT, Custom);
|
||||
}
|
||||
|
||||
setOperationAction(ISD::FP_TO_SINT, MVT::v16i32, Legal);
|
||||
setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v16i16, MVT::v16i32);
|
||||
setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v16i8, MVT::v16i32);
|
||||
setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v16i1, MVT::v16i32);
|
||||
setOperationAction(ISD::FP_TO_UINT, MVT::v16i32, Legal);
|
||||
setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v16i1, MVT::v16i32);
|
||||
setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v16i8, MVT::v16i32);
|
||||
setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v16i16, MVT::v16i32);
|
||||
setOperationAction(ISD::SINT_TO_FP, MVT::v16i32, Legal);
|
||||
setOperationAction(ISD::UINT_TO_FP, MVT::v16i32, Legal);
|
||||
for (MVT VT : { MVT::v16i1, MVT::v16i8, MVT::v16i16 }) {
|
||||
setOperationPromotedToType(ISD::FP_TO_SINT , VT, MVT::v16i32);
|
||||
setOperationPromotedToType(ISD::FP_TO_UINT , VT, MVT::v16i32);
|
||||
setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, VT, MVT::v16i32);
|
||||
setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, VT, MVT::v16i32);
|
||||
}
|
||||
setOperationAction(ISD::FP_TO_SINT, MVT::v16i32, Legal);
|
||||
setOperationAction(ISD::FP_TO_UINT, MVT::v16i32, Legal);
|
||||
setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v16i32, Legal);
|
||||
setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v16i32, Legal);
|
||||
setOperationAction(ISD::SINT_TO_FP, MVT::v16i32, Legal);
|
||||
setOperationAction(ISD::UINT_TO_FP, MVT::v16i32, Legal);
|
||||
|
||||
setOperationAction(ISD::STRICT_FADD, MVT::v16f32, Legal);
|
||||
setOperationAction(ISD::STRICT_FADD, MVT::v8f64, Legal);
|
||||
|
@ -1551,6 +1560,9 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
|
|||
setOperationAction(ISD::FP_TO_SINT, MVT::v8i64, Legal);
|
||||
setOperationAction(ISD::FP_TO_UINT, MVT::v8i64, Legal);
|
||||
|
||||
setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v8i64, Legal);
|
||||
setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v8i64, Legal);
|
||||
|
||||
setOperationAction(ISD::MUL, MVT::v8i64, Legal);
|
||||
}
|
||||
|
||||
|
@ -1641,12 +1653,13 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
|
|||
|
||||
if (Subtarget.hasDQI()) {
|
||||
for (auto VT : { MVT::v2i64, MVT::v4i64 }) {
|
||||
setOperationAction(ISD::SINT_TO_FP, VT, Legal);
|
||||
setOperationAction(ISD::UINT_TO_FP, VT, Legal);
|
||||
setOperationAction(ISD::FP_TO_SINT, VT, Legal);
|
||||
setOperationAction(ISD::FP_TO_UINT, VT, Legal);
|
||||
|
||||
setOperationAction(ISD::MUL, VT, Legal);
|
||||
setOperationAction(ISD::SINT_TO_FP, VT, Legal);
|
||||
setOperationAction(ISD::UINT_TO_FP, VT, Legal);
|
||||
setOperationAction(ISD::FP_TO_SINT, VT, Legal);
|
||||
setOperationAction(ISD::FP_TO_UINT, VT, Legal);
|
||||
setOperationAction(ISD::STRICT_FP_TO_SINT, VT, Legal);
|
||||
setOperationAction(ISD::STRICT_FP_TO_UINT, VT, Legal);
|
||||
setOperationAction(ISD::MUL, VT, Legal);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1821,8 +1834,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
|
|||
assert(isOperationCustom(ISD::UINT_TO_FP, MVT::v2f32) &&
|
||||
"Unexpected operation action!");
|
||||
// v2i64 FP_TO_S/UINT(v2f32) custom conversion.
|
||||
setOperationAction(ISD::FP_TO_SINT, MVT::v2f32, Custom);
|
||||
setOperationAction(ISD::FP_TO_UINT, MVT::v2f32, Custom);
|
||||
setOperationAction(ISD::FP_TO_SINT, MVT::v2f32, Custom);
|
||||
setOperationAction(ISD::FP_TO_UINT, MVT::v2f32, Custom);
|
||||
setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2f32, Custom);
|
||||
setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2f32, Custom);
|
||||
}
|
||||
|
||||
if (Subtarget.hasBWI()) {
|
||||
|
@ -19739,31 +19754,57 @@ SDValue X86TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
|
|||
if (VT == MVT::v2i1 && SrcVT == MVT::v2f64) {
|
||||
MVT ResVT = MVT::v4i32;
|
||||
MVT TruncVT = MVT::v4i1;
|
||||
unsigned Opc = IsSigned ? X86ISD::CVTTP2SI : X86ISD::CVTTP2UI;
|
||||
if (!IsSigned && !Subtarget.hasVLX()) {
|
||||
// Widen to 512-bits.
|
||||
ResVT = MVT::v8i32;
|
||||
TruncVT = MVT::v8i1;
|
||||
Opc = ISD::FP_TO_UINT;
|
||||
unsigned Opc = IsStrict ? ISD::STRICT_FP_TO_UINT : ISD::FP_TO_UINT;
|
||||
Src = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, MVT::v8f64,
|
||||
DAG.getUNDEF(MVT::v8f64),
|
||||
Src, DAG.getIntPtrConstant(0, dl));
|
||||
SDValue Res, Chain;
|
||||
if (IsStrict) {
|
||||
Res = DAG.getNode(Opc, dl, {ResVT, MVT::Other},
|
||||
{Op.getOperand(0), Src});
|
||||
Chain = Res.getValue(1);
|
||||
} else
|
||||
Res = DAG.getNode(Opc, dl, ResVT, Src);
|
||||
Res = DAG.getNode(ISD::TRUNCATE, dl, TruncVT, Res);
|
||||
Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i1, Res,
|
||||
DAG.getIntPtrConstant(0, dl));
|
||||
if (IsStrict)
|
||||
return DAG.getMergeValues({Res, Chain}, dl);
|
||||
return Res;
|
||||
}
|
||||
// FIXME: Strict fp!
|
||||
assert(!IsStrict && "Unhandled strict operation!");
|
||||
SDValue Res = DAG.getNode(Opc, dl, ResVT, Src);
|
||||
SDValue Res, Chain;
|
||||
unsigned Opc = IsSigned ? X86ISD::CVTTP2SI : X86ISD::CVTTP2UI;
|
||||
if (IsStrict) {
|
||||
Res =
|
||||
DAG.getNode(Opc, dl, {ResVT, MVT::Other}, {Op->getOperand(0), Src});
|
||||
Chain = Res.getValue(1);
|
||||
} else
|
||||
Res = DAG.getNode(Opc, dl, {ResVT, MVT::Other},
|
||||
{DAG.getEntryNode(), Src});
|
||||
Res = DAG.getNode(ISD::TRUNCATE, dl, TruncVT, Res);
|
||||
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i1, Res,
|
||||
DAG.getIntPtrConstant(0, dl));
|
||||
Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i1, Res,
|
||||
DAG.getIntPtrConstant(0, dl));
|
||||
if (IsStrict)
|
||||
return DAG.getMergeValues({Res, Chain}, dl);
|
||||
return Res;
|
||||
}
|
||||
|
||||
assert(Subtarget.hasDQI() && Subtarget.hasVLX() && "Requires AVX512DQVL!");
|
||||
if (VT == MVT::v2i64 && SrcVT == MVT::v2f32) {
|
||||
// FIXME: Strict fp!
|
||||
assert(!IsStrict && "Unhandled strict operation!");
|
||||
return DAG.getNode(IsSigned ? X86ISD::CVTTP2SI : X86ISD::CVTTP2UI, dl, VT,
|
||||
DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32, Src,
|
||||
DAG.getUNDEF(MVT::v2f32)));
|
||||
SDValue Tmp = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32, Src,
|
||||
DAG.getUNDEF(MVT::v2f32));
|
||||
unsigned Opc = IsSigned ? X86ISD::CVTTP2SI : X86ISD::CVTTP2UI;
|
||||
SDValue Res, Chain;
|
||||
if (IsStrict) {
|
||||
Res = DAG.getNode(Opc, dl, {VT, MVT::Other}, {Op->getOperand(0), Tmp});
|
||||
Chain = Res.getValue(1);
|
||||
return DAG.getMergeValues({Res, Chain}, dl);
|
||||
}
|
||||
return DAG.getNode(Opc, dl, {VT, MVT::Other}, {DAG.getEntryNode(), Tmp});
|
||||
}
|
||||
|
||||
return SDValue();
|
||||
|
@ -23100,6 +23141,26 @@ static SDValue recoverFramePointer(SelectionDAG &DAG, const Function *Fn,
|
|||
return DAG.getNode(ISD::SUB, dl, PtrVT, RegNodeBase, ParentFrameOffset);
|
||||
}
|
||||
|
||||
// We share some nodes between STRICT and non STRICT FP intrinsics.
|
||||
// For these nodes, we need chain them to entry token if they are not called
|
||||
// by STRICT FP intrinsics.
|
||||
static SDValue getProperNode(unsigned Opcode, const SDLoc &dl, EVT VT,
|
||||
ArrayRef<SDValue> Ops, SelectionDAG &DAG) {
|
||||
switch (Opcode) {
|
||||
default:
|
||||
return DAG.getNode(Opcode, dl, VT, Ops);
|
||||
case X86ISD::CVTTP2SI:
|
||||
case X86ISD::CVTTP2UI:
|
||||
case X86ISD::CMPP:
|
||||
case X86ISD::CMPM:
|
||||
break;
|
||||
}
|
||||
|
||||
SmallVector<SDValue, 6> NewOps = {DAG.getEntryNode()};
|
||||
NewOps.append(Ops.begin(), Ops.end());
|
||||
return DAG.getNode(Opcode, dl, {VT, MVT::Other}, NewOps);
|
||||
}
|
||||
|
||||
SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
|
||||
SelectionDAG &DAG) const {
|
||||
// Helper to detect if the operand is CUR_DIRECTION rounding mode.
|
||||
|
@ -23144,23 +23205,6 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
|
|||
MVT VT = Op.getSimpleValueType();
|
||||
const IntrinsicData* IntrData = getIntrinsicWithoutChain(IntNo);
|
||||
|
||||
// We share some nodes between STRICT and non STRICT FP intrinsics.
|
||||
// For these nodes, we need chain them to entry token if they are not called
|
||||
// by STRICT FP intrinsics.
|
||||
auto getProperNode = [&](unsigned Opcode, EVT VT, ArrayRef<SDValue> Ops) {
|
||||
switch (Opcode) {
|
||||
default:
|
||||
return DAG.getNode(Opcode, dl, VT, Ops);
|
||||
case X86ISD::CMPP:
|
||||
case X86ISD::CMPM:
|
||||
break;
|
||||
}
|
||||
|
||||
SmallVector<SDValue, 6> NewOps = {DAG.getEntryNode()};
|
||||
NewOps.append(Ops.begin(), Ops.end());
|
||||
return DAG.getNode(Opcode, dl, {VT, MVT::Other}, NewOps);
|
||||
};
|
||||
|
||||
if (IntrData) {
|
||||
switch(IntrData->Type) {
|
||||
case INTR_TYPE_1OP: {
|
||||
|
@ -23178,7 +23222,8 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
|
|||
if (!isRoundModeCurDirection(Rnd))
|
||||
return SDValue();
|
||||
}
|
||||
return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Op.getOperand(1));
|
||||
return getProperNode(IntrData->Opc0, dl, Op.getValueType(),
|
||||
Op.getOperand(1), DAG);
|
||||
}
|
||||
case INTR_TYPE_1OP_SAE: {
|
||||
SDValue Sae = Op.getOperand(2);
|
||||
|
@ -23249,8 +23294,8 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
|
|||
return SDValue();
|
||||
}
|
||||
|
||||
return getProperNode(IntrData->Opc0, Op.getValueType(),
|
||||
{Src1, Src2, Src3});
|
||||
return getProperNode(IntrData->Opc0, dl, Op.getValueType(),
|
||||
{Src1, Src2, Src3}, DAG);
|
||||
}
|
||||
case INTR_TYPE_4OP:
|
||||
return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Op.getOperand(1),
|
||||
|
@ -23274,8 +23319,9 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
|
|||
if (!isRoundModeCurDirection(Rnd))
|
||||
return SDValue();
|
||||
}
|
||||
return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Src),
|
||||
Mask, PassThru, Subtarget, DAG);
|
||||
return getVectorMaskingNode(
|
||||
getProperNode(IntrData->Opc0, dl, VT, Src, DAG), Mask, PassThru,
|
||||
Subtarget, DAG);
|
||||
}
|
||||
case INTR_TYPE_1OP_MASK_SAE: {
|
||||
SDValue Src = Op.getOperand(1);
|
||||
|
@ -23291,8 +23337,8 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
|
|||
else
|
||||
return SDValue();
|
||||
|
||||
return getVectorMaskingNode(DAG.getNode(Opc, dl, VT, Src),
|
||||
Mask, PassThru, Subtarget, DAG);
|
||||
return getVectorMaskingNode(getProperNode(Opc, dl, VT, Src, DAG), Mask,
|
||||
PassThru, Subtarget, DAG);
|
||||
}
|
||||
case INTR_TYPE_SCALAR_MASK: {
|
||||
SDValue Src1 = Op.getOperand(1);
|
||||
|
@ -23498,8 +23544,8 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
|
|||
return SDValue();
|
||||
}
|
||||
//default rounding mode
|
||||
return getProperNode(IntrData->Opc0, MaskVT,
|
||||
{Op.getOperand(1), Op.getOperand(2), CC});
|
||||
return getProperNode(IntrData->Opc0, dl, MaskVT,
|
||||
{Op.getOperand(1), Op.getOperand(2), CC}, DAG);
|
||||
}
|
||||
case CMP_MASK_SCALAR_CC: {
|
||||
SDValue Src1 = Op.getOperand(1);
|
||||
|
@ -23694,13 +23740,13 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
|
|||
SDValue Mask = Op.getOperand(3);
|
||||
|
||||
if (isAllOnesConstant(Mask))
|
||||
return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Src);
|
||||
return getProperNode(IntrData->Opc0, dl, Op.getValueType(), Src, DAG);
|
||||
|
||||
MVT SrcVT = Src.getSimpleValueType();
|
||||
MVT MaskVT = MVT::getVectorVT(MVT::i1, SrcVT.getVectorNumElements());
|
||||
Mask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl);
|
||||
return DAG.getNode(IntrData->Opc1, dl, Op.getValueType(), Src, PassThru,
|
||||
Mask);
|
||||
return getProperNode(IntrData->Opc1, dl, Op.getValueType(),
|
||||
{Src, PassThru, Mask}, DAG);
|
||||
}
|
||||
case CVTPS2PH_MASK: {
|
||||
SDValue Src = Op.getOperand(1);
|
||||
|
@ -28566,8 +28612,8 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
|
|||
SDValue Res;
|
||||
SDValue Chain;
|
||||
if (IsStrict) {
|
||||
Res = DAG.getNode(ISD::FP_TO_SINT, dl, { PromoteVT, MVT::Other },
|
||||
{ N->getOperand(0), Src });
|
||||
Res = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, {PromoteVT, MVT::Other},
|
||||
{N->getOperand(0), Src});
|
||||
Chain = Res.getValue(1);
|
||||
} else
|
||||
Res = DAG.getNode(ISD::FP_TO_SINT, dl, PromoteVT, Src);
|
||||
|
@ -28610,11 +28656,19 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
|
|||
// legalization to v8i32<-v8f64.
|
||||
return;
|
||||
}
|
||||
// FIXME: Strict fp.
|
||||
assert(!IsStrict && "Missing STRICT_FP_TO_SINT support!");
|
||||
unsigned Opc = IsSigned ? X86ISD::CVTTP2SI : X86ISD::CVTTP2UI;
|
||||
SDValue Res = DAG.getNode(Opc, dl, MVT::v4i32, Src);
|
||||
SDValue Res;
|
||||
SDValue Chain;
|
||||
if (IsStrict) {
|
||||
Res = DAG.getNode(Opc, dl, {MVT::v4i32, MVT::Other},
|
||||
{N->getOperand(0), Src});
|
||||
Chain = Res.getValue(1);
|
||||
} else
|
||||
Res = DAG.getNode(Opc, dl, {MVT::v4i32, MVT::Other},
|
||||
{DAG.getEntryNode(), Src});
|
||||
Results.push_back(Res);
|
||||
if (IsStrict)
|
||||
Results.push_back(Chain);
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -34719,7 +34773,6 @@ static SDValue combineShuffle(SDNode *N, SelectionDAG &DAG,
|
|||
break;
|
||||
case X86ISD::CVTP2SI: case X86ISD::CVTP2UI:
|
||||
case X86ISD::MCVTP2SI: case X86ISD::MCVTP2UI:
|
||||
case X86ISD::CVTTP2SI: case X86ISD::CVTTP2UI:
|
||||
case X86ISD::MCVTTP2SI: case X86ISD::MCVTTP2UI:
|
||||
case X86ISD::CVTSI2P: case X86ISD::CVTUI2P:
|
||||
case X86ISD::MCVTSI2P: case X86ISD::MCVTUI2P:
|
||||
|
@ -34728,6 +34781,12 @@ static SDValue combineShuffle(SDNode *N, SelectionDAG &DAG,
|
|||
In.getOperand(0).getValueType() == MVT::v2i64)
|
||||
return N->getOperand(0); // return the bitcast
|
||||
break;
|
||||
case X86ISD::CVTTP2SI:
|
||||
case X86ISD::CVTTP2UI:
|
||||
if (In.getOperand(1).getValueType() == MVT::v2f64 ||
|
||||
In.getOperand(1).getValueType() == MVT::v2i64)
|
||||
return N->getOperand(0);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -42431,12 +42490,16 @@ static SDValue combineCVTP2I_CVTTP2I(SDNode *N, SelectionDAG &DAG,
|
|||
EVT VT = N->getValueType(0);
|
||||
|
||||
// Convert a full vector load into vzload when not all bits are needed.
|
||||
SDValue In = N->getOperand(0);
|
||||
SDValue In;
|
||||
if (N->getOpcode() == X86ISD::CVTTP2SI || N->getOpcode() == X86ISD::CVTTP2UI)
|
||||
In = N->getOperand(1);
|
||||
else
|
||||
In = N->getOperand(0);
|
||||
MVT InVT = In.getSimpleValueType();
|
||||
if (VT.getVectorNumElements() < InVT.getVectorNumElements() &&
|
||||
ISD::isNormalLoad(In.getNode()) && In.hasOneUse()) {
|
||||
assert(InVT.is128BitVector() && "Expected 128-bit input vector");
|
||||
LoadSDNode *LN = cast<LoadSDNode>(N->getOperand(0));
|
||||
LoadSDNode *LN = cast<LoadSDNode>(In);
|
||||
// Unless the load is volatile or atomic.
|
||||
if (LN->isSimple()) {
|
||||
SDLoc dl(N);
|
||||
|
@ -42450,9 +42513,13 @@ static SDValue combineCVTP2I_CVTTP2I(SDNode *N, SelectionDAG &DAG,
|
|||
LN->getPointerInfo(),
|
||||
LN->getAlignment(),
|
||||
LN->getMemOperand()->getFlags());
|
||||
SDValue Convert = DAG.getNode(N->getOpcode(), dl, VT,
|
||||
DAG.getBitcast(InVT, VZLoad));
|
||||
DCI.CombineTo(N, Convert);
|
||||
SDValue Convert = getProperNode(N->getOpcode(), dl, VT,
|
||||
DAG.getBitcast(InVT, VZLoad), DAG);
|
||||
if (Convert->getOpcode() == X86ISD::CVTTP2SI ||
|
||||
Convert->getOpcode() == X86ISD::CVTTP2UI)
|
||||
DCI.CombineTo(N, Convert.getValue(0), Convert.getValue(1));
|
||||
else
|
||||
DCI.CombineTo(N, Convert);
|
||||
DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), VZLoad.getValue(1));
|
||||
return SDValue(N, 0);
|
||||
}
|
||||
|
|
|
@ -7350,29 +7350,29 @@ let Predicates = [HasAVX512], ExeDomain = _SrcRC.ExeDomain in {
|
|||
}
|
||||
|
||||
defm VCVTTSS2SIZ: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i32x_info,
|
||||
fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I,
|
||||
any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I,
|
||||
"{l}">, XS, EVEX_CD8<32, CD8VT1>;
|
||||
defm VCVTTSS2SI64Z: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i64x_info,
|
||||
fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I,
|
||||
any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I,
|
||||
"{q}">, VEX_W, XS, EVEX_CD8<32, CD8VT1>;
|
||||
defm VCVTTSD2SIZ: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i32x_info,
|
||||
fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSD2I,
|
||||
any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSD2I,
|
||||
"{l}">, XD, EVEX_CD8<64, CD8VT1>;
|
||||
defm VCVTTSD2SI64Z: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i64x_info,
|
||||
fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSD2I,
|
||||
any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSD2I,
|
||||
"{q}">, VEX_W, XD, EVEX_CD8<64, CD8VT1>;
|
||||
|
||||
defm VCVTTSS2USIZ: avx512_cvt_s_all<0x78, "vcvttss2usi", f32x_info, i32x_info,
|
||||
fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I,
|
||||
any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I,
|
||||
"{l}">, XS, EVEX_CD8<32, CD8VT1>;
|
||||
defm VCVTTSS2USI64Z: avx512_cvt_s_all<0x78, "vcvttss2usi", f32x_info, i64x_info,
|
||||
fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I,
|
||||
any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I,
|
||||
"{q}">, XS,VEX_W, EVEX_CD8<32, CD8VT1>;
|
||||
defm VCVTTSD2USIZ: avx512_cvt_s_all<0x78, "vcvttsd2usi", f64x_info, i32x_info,
|
||||
fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSD2I,
|
||||
any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSD2I,
|
||||
"{l}">, XD, EVEX_CD8<64, CD8VT1>;
|
||||
defm VCVTTSD2USI64Z: avx512_cvt_s_all<0x78, "vcvttsd2usi", f64x_info, i64x_info,
|
||||
fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSD2I,
|
||||
any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSD2I,
|
||||
"{q}">, XD, VEX_W, EVEX_CD8<64, CD8VT1>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
|
|
@ -623,8 +623,8 @@ def X86cvtp2UIntRnd : SDNode<"X86ISD::CVTP2UI_RND", SDTFloatToIntRnd>;
|
|||
// Vector without rounding mode
|
||||
|
||||
// cvtt fp-to-int staff
|
||||
def X86cvttp2si : SDNode<"X86ISD::CVTTP2SI", SDTFloatToInt>;
|
||||
def X86cvttp2ui : SDNode<"X86ISD::CVTTP2UI", SDTFloatToInt>;
|
||||
def X86cvttp2si : SDNode<"X86ISD::CVTTP2SI", SDTFloatToInt, [SDNPHasChain]>;
|
||||
def X86cvttp2ui : SDNode<"X86ISD::CVTTP2UI", SDTFloatToInt, [SDNPHasChain]>;
|
||||
|
||||
def X86VSintToFP : SDNode<"X86ISD::CVTSI2P", SDTVintToFP>;
|
||||
def X86VUintToFP : SDNode<"X86ISD::CVTUI2P", SDTVintToFP>;
|
||||
|
|
|
@ -868,19 +868,19 @@ let hasSideEffects = 0, Predicates = [UseAVX], ExeDomain = d in {
|
|||
}
|
||||
|
||||
let isCodeGenOnly = 1, Predicates = [UseAVX], Uses = [MXCSR], mayRaiseFPException = 1 in {
|
||||
defm VCVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32,
|
||||
defm VCVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, any_fp_to_sint, f32mem, loadf32,
|
||||
"cvttss2si", "cvttss2si",
|
||||
WriteCvtSS2I, SSEPackedSingle>,
|
||||
XS, VEX, VEX_LIG;
|
||||
defm VCVTTSS2SI64 : sse12_cvt_s<0x2C, FR32, GR64, fp_to_sint, f32mem, loadf32,
|
||||
defm VCVTTSS2SI64 : sse12_cvt_s<0x2C, FR32, GR64, any_fp_to_sint, f32mem, loadf32,
|
||||
"cvttss2si", "cvttss2si",
|
||||
WriteCvtSS2I, SSEPackedSingle>,
|
||||
XS, VEX, VEX_W, VEX_LIG;
|
||||
defm VCVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, fp_to_sint, f64mem, loadf64,
|
||||
defm VCVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, any_fp_to_sint, f64mem, loadf64,
|
||||
"cvttsd2si", "cvttsd2si",
|
||||
WriteCvtSD2I, SSEPackedDouble>,
|
||||
XD, VEX, VEX_LIG;
|
||||
defm VCVTTSD2SI64 : sse12_cvt_s<0x2C, FR64, GR64, fp_to_sint, f64mem, loadf64,
|
||||
defm VCVTTSD2SI64 : sse12_cvt_s<0x2C, FR64, GR64, any_fp_to_sint, f64mem, loadf64,
|
||||
"cvttsd2si", "cvttsd2si",
|
||||
WriteCvtSD2I, SSEPackedDouble>,
|
||||
XD, VEX, VEX_W, VEX_LIG;
|
||||
|
@ -926,16 +926,16 @@ let Predicates = [UseAVX] in {
|
|||
}
|
||||
|
||||
let isCodeGenOnly = 1 in {
|
||||
defm CVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32,
|
||||
defm CVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, any_fp_to_sint, f32mem, loadf32,
|
||||
"cvttss2si", "cvttss2si",
|
||||
WriteCvtSS2I, SSEPackedSingle>, XS, SIMD_EXC;
|
||||
defm CVTTSS2SI64 : sse12_cvt_s<0x2C, FR32, GR64, fp_to_sint, f32mem, loadf32,
|
||||
defm CVTTSS2SI64 : sse12_cvt_s<0x2C, FR32, GR64, any_fp_to_sint, f32mem, loadf32,
|
||||
"cvttss2si", "cvttss2si",
|
||||
WriteCvtSS2I, SSEPackedSingle>, XS, REX_W, SIMD_EXC;
|
||||
defm CVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, fp_to_sint, f64mem, loadf64,
|
||||
defm CVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, any_fp_to_sint, f64mem, loadf64,
|
||||
"cvttsd2si", "cvttsd2si",
|
||||
WriteCvtSD2I, SSEPackedDouble>, XD, SIMD_EXC;
|
||||
defm CVTTSD2SI64 : sse12_cvt_s<0x2C, FR64, GR64, fp_to_sint, f64mem, loadf64,
|
||||
defm CVTTSD2SI64 : sse12_cvt_s<0x2C, FR64, GR64, any_fp_to_sint, f64mem, loadf64,
|
||||
"cvttsd2si", "cvttsd2si",
|
||||
WriteCvtSD2I, SSEPackedDouble>, XD, REX_W, SIMD_EXC;
|
||||
defm CVTSI2SS : sse12_cvt_s<0x2A, GR32, FR32, sint_to_fp, i32mem, loadi32,
|
||||
|
@ -1595,9 +1595,9 @@ def : InstAlias<"vcvttpd2dqy\t{$src, $dst|$dst, $src}",
|
|||
(VCVTTPD2DQYrr VR128:$dst, VR256:$src), 0, "att">;
|
||||
|
||||
let Predicates = [HasAVX, NoVLX] in {
|
||||
def : Pat<(v4i32 (fp_to_sint (v4f64 VR256:$src))),
|
||||
def : Pat<(v4i32 (any_fp_to_sint (v4f64 VR256:$src))),
|
||||
(VCVTTPD2DQYrr VR256:$src)>;
|
||||
def : Pat<(v4i32 (fp_to_sint (loadv4f64 addr:$src))),
|
||||
def : Pat<(v4i32 (any_fp_to_sint (loadv4f64 addr:$src))),
|
||||
(VCVTTPD2DQYrm addr:$src)>;
|
||||
}
|
||||
|
||||
|
|
|
@ -4322,17 +4322,21 @@ declare <8 x i32> @llvm.x86.avx512.mask.cvttpd2dq.512(<8 x double>, <8 x i32>, i
|
|||
define <8 x i32>@test_int_x86_avx512_mask_cvtt_pd2dq_512(<8 x double> %x0, <8 x i32> %x1, i8 %x2) {
|
||||
; X64-LABEL: test_int_x86_avx512_mask_cvtt_pd2dq_512:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
|
||||
; X64-NEXT: kmovw %edi, %k1
|
||||
; X64-NEXT: vcvttpd2dq %zmm0, %ymm1 {%k1}
|
||||
; X64-NEXT: vcvttpd2dq %zmm0, %ymm2
|
||||
; X64-NEXT: vmovdqa32 %zmm2, %zmm1 {%k1}
|
||||
; X64-NEXT: vcvttpd2dq {sae}, %zmm0, %ymm0
|
||||
; X64-NEXT: vpaddd %ymm0, %ymm1, %ymm0
|
||||
; X64-NEXT: retq
|
||||
;
|
||||
; X86-LABEL: test_int_x86_avx512_mask_cvtt_pd2dq_512:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
|
||||
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: kmovw %eax, %k1
|
||||
; X86-NEXT: vcvttpd2dq %zmm0, %ymm1 {%k1}
|
||||
; X86-NEXT: vcvttpd2dq %zmm0, %ymm2
|
||||
; X86-NEXT: vmovdqa32 %zmm2, %zmm1 {%k1}
|
||||
; X86-NEXT: vcvttpd2dq {sae}, %zmm0, %ymm0
|
||||
; X86-NEXT: vpaddd %ymm0, %ymm1, %ymm0
|
||||
; X86-NEXT: retl
|
||||
|
@ -4373,17 +4377,21 @@ declare <8 x i32> @llvm.x86.avx512.mask.cvttpd2udq.512(<8 x double>, <8 x i32>,
|
|||
define <8 x i32>@test_int_x86_avx512_mask_cvtt_pd2udq_512(<8 x double> %x0, <8 x i32> %x1, i8 %x2) {
|
||||
; X64-LABEL: test_int_x86_avx512_mask_cvtt_pd2udq_512:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
|
||||
; X64-NEXT: kmovw %edi, %k1
|
||||
; X64-NEXT: vcvttpd2udq %zmm0, %ymm1 {%k1}
|
||||
; X64-NEXT: vcvttpd2udq %zmm0, %ymm2
|
||||
; X64-NEXT: vmovdqa32 %zmm2, %zmm1 {%k1}
|
||||
; X64-NEXT: vcvttpd2udq {sae}, %zmm0, %ymm0
|
||||
; X64-NEXT: vpaddd %ymm0, %ymm1, %ymm0
|
||||
; X64-NEXT: retq
|
||||
;
|
||||
; X86-LABEL: test_int_x86_avx512_mask_cvtt_pd2udq_512:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
|
||||
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: kmovw %eax, %k1
|
||||
; X86-NEXT: vcvttpd2udq %zmm0, %ymm1 {%k1}
|
||||
; X86-NEXT: vcvttpd2udq %zmm0, %ymm2
|
||||
; X86-NEXT: vmovdqa32 %zmm2, %zmm1 {%k1}
|
||||
; X86-NEXT: vcvttpd2udq {sae}, %zmm0, %ymm0
|
||||
; X86-NEXT: vpaddd %ymm0, %ymm1, %ymm0
|
||||
; X86-NEXT: retl
|
||||
|
@ -4399,7 +4407,8 @@ define <16 x i32>@test_int_x86_avx512_mask_cvtt_ps2dq_512(<16 x float> %x0, <16
|
|||
; X64-LABEL: test_int_x86_avx512_mask_cvtt_ps2dq_512:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: kmovw %edi, %k1
|
||||
; X64-NEXT: vcvttps2dq %zmm0, %zmm1 {%k1}
|
||||
; X64-NEXT: vcvttps2dq %zmm0, %zmm2
|
||||
; X64-NEXT: vmovdqa32 %zmm2, %zmm1 {%k1}
|
||||
; X64-NEXT: vcvttps2dq {sae}, %zmm0, %zmm0
|
||||
; X64-NEXT: vpaddd %zmm0, %zmm1, %zmm0
|
||||
; X64-NEXT: retq
|
||||
|
@ -4407,7 +4416,8 @@ define <16 x i32>@test_int_x86_avx512_mask_cvtt_ps2dq_512(<16 x float> %x0, <16
|
|||
; X86-LABEL: test_int_x86_avx512_mask_cvtt_ps2dq_512:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
|
||||
; X86-NEXT: vcvttps2dq %zmm0, %zmm1 {%k1}
|
||||
; X86-NEXT: vcvttps2dq %zmm0, %zmm2
|
||||
; X86-NEXT: vmovdqa32 %zmm2, %zmm1 {%k1}
|
||||
; X86-NEXT: vcvttps2dq {sae}, %zmm0, %zmm0
|
||||
; X86-NEXT: vpaddd %zmm0, %zmm1, %zmm0
|
||||
; X86-NEXT: retl
|
||||
|
@ -4423,7 +4433,8 @@ define <16 x i32>@test_int_x86_avx512_mask_cvtt_ps2udq_512(<16 x float> %x0, <16
|
|||
; X64-LABEL: test_int_x86_avx512_mask_cvtt_ps2udq_512:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: kmovw %edi, %k1
|
||||
; X64-NEXT: vcvttps2udq %zmm0, %zmm1 {%k1}
|
||||
; X64-NEXT: vcvttps2udq %zmm0, %zmm2
|
||||
; X64-NEXT: vmovdqa32 %zmm2, %zmm1 {%k1}
|
||||
; X64-NEXT: vcvttps2udq {sae}, %zmm0, %zmm0
|
||||
; X64-NEXT: vpaddd %zmm0, %zmm1, %zmm0
|
||||
; X64-NEXT: retq
|
||||
|
@ -4431,7 +4442,8 @@ define <16 x i32>@test_int_x86_avx512_mask_cvtt_ps2udq_512(<16 x float> %x0, <16
|
|||
; X86-LABEL: test_int_x86_avx512_mask_cvtt_ps2udq_512:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
|
||||
; X86-NEXT: vcvttps2udq %zmm0, %zmm1 {%k1}
|
||||
; X86-NEXT: vcvttps2udq %zmm0, %zmm2
|
||||
; X86-NEXT: vmovdqa32 %zmm2, %zmm1 {%k1}
|
||||
; X86-NEXT: vcvttps2udq {sae}, %zmm0, %zmm0
|
||||
; X86-NEXT: vpaddd %zmm0, %zmm1, %zmm0
|
||||
; X86-NEXT: retl
|
||||
|
|
|
@ -286,7 +286,8 @@ define <8 x i64>@test_int_x86_avx512_mask_cvtt_pd2qq_512(<8 x double> %x0, <8 x
|
|||
; X86-LABEL: test_int_x86_avx512_mask_cvtt_pd2qq_512:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
|
||||
; X86-NEXT: vcvttpd2qq %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x49,0x7a,0xc8]
|
||||
; X86-NEXT: vcvttpd2qq %zmm0, %zmm2 # encoding: [0x62,0xf1,0xfd,0x48,0x7a,0xd0]
|
||||
; X86-NEXT: vmovdqa64 %zmm2, %zmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x49,0x6f,0xca]
|
||||
; X86-NEXT: vcvttpd2qq {sae}, %zmm0, %zmm0 # encoding: [0x62,0xf1,0xfd,0x18,0x7a,0xc0]
|
||||
; X86-NEXT: vpaddq %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
|
@ -294,7 +295,8 @@ define <8 x i64>@test_int_x86_avx512_mask_cvtt_pd2qq_512(<8 x double> %x0, <8 x
|
|||
; X64-LABEL: test_int_x86_avx512_mask_cvtt_pd2qq_512:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; X64-NEXT: vcvttpd2qq %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x49,0x7a,0xc8]
|
||||
; X64-NEXT: vcvttpd2qq %zmm0, %zmm2 # encoding: [0x62,0xf1,0xfd,0x48,0x7a,0xd0]
|
||||
; X64-NEXT: vmovdqa64 %zmm2, %zmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x49,0x6f,0xca]
|
||||
; X64-NEXT: vcvttpd2qq {sae}, %zmm0, %zmm0 # encoding: [0x62,0xf1,0xfd,0x18,0x7a,0xc0]
|
||||
; X64-NEXT: vpaddq %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
|
@ -310,7 +312,8 @@ define <8 x i64>@test_int_x86_avx512_mask_cvtt_pd2uqq_512(<8 x double> %x0, <8 x
|
|||
; X86-LABEL: test_int_x86_avx512_mask_cvtt_pd2uqq_512:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
|
||||
; X86-NEXT: vcvttpd2uqq %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x49,0x78,0xc8]
|
||||
; X86-NEXT: vcvttpd2uqq %zmm0, %zmm2 # encoding: [0x62,0xf1,0xfd,0x48,0x78,0xd0]
|
||||
; X86-NEXT: vmovdqa64 %zmm2, %zmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x49,0x6f,0xca]
|
||||
; X86-NEXT: vcvttpd2uqq {sae}, %zmm0, %zmm0 # encoding: [0x62,0xf1,0xfd,0x18,0x78,0xc0]
|
||||
; X86-NEXT: vpaddq %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
|
@ -318,7 +321,8 @@ define <8 x i64>@test_int_x86_avx512_mask_cvtt_pd2uqq_512(<8 x double> %x0, <8 x
|
|||
; X64-LABEL: test_int_x86_avx512_mask_cvtt_pd2uqq_512:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; X64-NEXT: vcvttpd2uqq %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x49,0x78,0xc8]
|
||||
; X64-NEXT: vcvttpd2uqq %zmm0, %zmm2 # encoding: [0x62,0xf1,0xfd,0x48,0x78,0xd0]
|
||||
; X64-NEXT: vmovdqa64 %zmm2, %zmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x49,0x6f,0xca]
|
||||
; X64-NEXT: vcvttpd2uqq {sae}, %zmm0, %zmm0 # encoding: [0x62,0xf1,0xfd,0x18,0x78,0xc0]
|
||||
; X64-NEXT: vpaddq %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
|
@ -334,7 +338,8 @@ define <8 x i64>@test_int_x86_avx512_mask_cvtt_ps2qq_512(<8 x float> %x0, <8 x i
|
|||
; X86-LABEL: test_int_x86_avx512_mask_cvtt_ps2qq_512:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
|
||||
; X86-NEXT: vcvttps2qq %ymm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x7a,0xc8]
|
||||
; X86-NEXT: vcvttps2qq %ymm0, %zmm2 # encoding: [0x62,0xf1,0x7d,0x48,0x7a,0xd0]
|
||||
; X86-NEXT: vmovdqa64 %zmm2, %zmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x49,0x6f,0xca]
|
||||
; X86-NEXT: vcvttps2qq {sae}, %ymm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x18,0x7a,0xc0]
|
||||
; X86-NEXT: vpaddq %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
|
@ -342,7 +347,8 @@ define <8 x i64>@test_int_x86_avx512_mask_cvtt_ps2qq_512(<8 x float> %x0, <8 x i
|
|||
; X64-LABEL: test_int_x86_avx512_mask_cvtt_ps2qq_512:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; X64-NEXT: vcvttps2qq %ymm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x7a,0xc8]
|
||||
; X64-NEXT: vcvttps2qq %ymm0, %zmm2 # encoding: [0x62,0xf1,0x7d,0x48,0x7a,0xd0]
|
||||
; X64-NEXT: vmovdqa64 %zmm2, %zmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x49,0x6f,0xca]
|
||||
; X64-NEXT: vcvttps2qq {sae}, %ymm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x18,0x7a,0xc0]
|
||||
; X64-NEXT: vpaddq %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
|
@ -358,7 +364,8 @@ define <8 x i64>@test_int_x86_avx512_mask_cvtt_ps2uqq_512(<8 x float> %x0, <8 x
|
|||
; X86-LABEL: test_int_x86_avx512_mask_cvtt_ps2uqq_512:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
|
||||
; X86-NEXT: vcvttps2uqq %ymm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x78,0xc8]
|
||||
; X86-NEXT: vcvttps2uqq %ymm0, %zmm2 # encoding: [0x62,0xf1,0x7d,0x48,0x78,0xd0]
|
||||
; X86-NEXT: vmovdqa64 %zmm2, %zmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x49,0x6f,0xca]
|
||||
; X86-NEXT: vcvttps2uqq {sae}, %ymm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x18,0x78,0xc0]
|
||||
; X86-NEXT: vpaddq %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
|
@ -366,7 +373,8 @@ define <8 x i64>@test_int_x86_avx512_mask_cvtt_ps2uqq_512(<8 x float> %x0, <8 x
|
|||
; X64-LABEL: test_int_x86_avx512_mask_cvtt_ps2uqq_512:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; X64-NEXT: vcvttps2uqq %ymm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x78,0xc8]
|
||||
; X64-NEXT: vcvttps2uqq %ymm0, %zmm2 # encoding: [0x62,0xf1,0x7d,0x48,0x78,0xd0]
|
||||
; X64-NEXT: vmovdqa64 %zmm2, %zmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x49,0x6f,0xca]
|
||||
; X64-NEXT: vcvttps2uqq {sae}, %ymm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x18,0x78,0xc0]
|
||||
; X64-NEXT: vpaddq %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
|
|
|
@ -599,17 +599,17 @@ declare <2 x i64> @llvm.x86.avx512.mask.cvttpd2qq.128(<2 x double>, <2 x i64>, i
|
|||
define <2 x i64>@test_int_x86_avx512_mask_cvtt_pd2qq_128(<2 x double> %x0, <2 x i64> %x1, i8 %x2) {
|
||||
; X86-LABEL: test_int_x86_avx512_mask_cvtt_pd2qq_128:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
|
||||
; X86-NEXT: vcvttpd2qq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x7a,0xc8]
|
||||
; X86-NEXT: vcvttpd2qq %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfd,0x08,0x7a,0xc0]
|
||||
; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
|
||||
; X86-NEXT: vmovdqa64 %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x6f,0xc8]
|
||||
; X86-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_int_x86_avx512_mask_cvtt_pd2qq_128:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; X64-NEXT: vcvttpd2qq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x7a,0xc8]
|
||||
; X64-NEXT: vcvttpd2qq %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfd,0x08,0x7a,0xc0]
|
||||
; X64-NEXT: vmovdqa64 %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x6f,0xc8]
|
||||
; X64-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <2 x i64> @llvm.x86.avx512.mask.cvttpd2qq.128(<2 x double> %x0, <2 x i64> %x1, i8 %x2)
|
||||
|
@ -623,17 +623,17 @@ declare <4 x i64> @llvm.x86.avx512.mask.cvttpd2qq.256(<4 x double>, <4 x i64>, i
|
|||
define <4 x i64>@test_int_x86_avx512_mask_cvtt_pd2qq_256(<4 x double> %x0, <4 x i64> %x1, i8 %x2) {
|
||||
; X86-LABEL: test_int_x86_avx512_mask_cvtt_pd2qq_256:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
|
||||
; X86-NEXT: vcvttpd2qq %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x7a,0xc8]
|
||||
; X86-NEXT: vcvttpd2qq %ymm0, %ymm0 # encoding: [0x62,0xf1,0xfd,0x28,0x7a,0xc0]
|
||||
; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
|
||||
; X86-NEXT: vmovdqa64 %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x6f,0xc8]
|
||||
; X86-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_int_x86_avx512_mask_cvtt_pd2qq_256:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; X64-NEXT: vcvttpd2qq %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x7a,0xc8]
|
||||
; X64-NEXT: vcvttpd2qq %ymm0, %ymm0 # encoding: [0x62,0xf1,0xfd,0x28,0x7a,0xc0]
|
||||
; X64-NEXT: vmovdqa64 %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x6f,0xc8]
|
||||
; X64-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <4 x i64> @llvm.x86.avx512.mask.cvttpd2qq.256(<4 x double> %x0, <4 x i64> %x1, i8 %x2)
|
||||
|
@ -647,17 +647,17 @@ declare <2 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.128(<2 x double>, <2 x i64>,
|
|||
define <2 x i64>@test_int_x86_avx512_mask_cvtt_pd2uqq_128(<2 x double> %x0, <2 x i64> %x1, i8 %x2) {
|
||||
; X86-LABEL: test_int_x86_avx512_mask_cvtt_pd2uqq_128:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
|
||||
; X86-NEXT: vcvttpd2uqq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x78,0xc8]
|
||||
; X86-NEXT: vcvttpd2uqq %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfd,0x08,0x78,0xc0]
|
||||
; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
|
||||
; X86-NEXT: vmovdqa64 %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x6f,0xc8]
|
||||
; X86-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_int_x86_avx512_mask_cvtt_pd2uqq_128:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; X64-NEXT: vcvttpd2uqq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x78,0xc8]
|
||||
; X64-NEXT: vcvttpd2uqq %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfd,0x08,0x78,0xc0]
|
||||
; X64-NEXT: vmovdqa64 %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x6f,0xc8]
|
||||
; X64-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <2 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.128(<2 x double> %x0, <2 x i64> %x1, i8 %x2)
|
||||
|
@ -671,17 +671,17 @@ declare <4 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.256(<4 x double>, <4 x i64>,
|
|||
define <4 x i64>@test_int_x86_avx512_mask_cvtt_pd2uqq_256(<4 x double> %x0, <4 x i64> %x1, i8 %x2) {
|
||||
; X86-LABEL: test_int_x86_avx512_mask_cvtt_pd2uqq_256:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
|
||||
; X86-NEXT: vcvttpd2uqq %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x78,0xc8]
|
||||
; X86-NEXT: vcvttpd2uqq %ymm0, %ymm0 # encoding: [0x62,0xf1,0xfd,0x28,0x78,0xc0]
|
||||
; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
|
||||
; X86-NEXT: vmovdqa64 %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x6f,0xc8]
|
||||
; X86-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_int_x86_avx512_mask_cvtt_pd2uqq_256:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; X64-NEXT: vcvttpd2uqq %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x78,0xc8]
|
||||
; X64-NEXT: vcvttpd2uqq %ymm0, %ymm0 # encoding: [0x62,0xf1,0xfd,0x28,0x78,0xc0]
|
||||
; X64-NEXT: vmovdqa64 %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x6f,0xc8]
|
||||
; X64-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <4 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.256(<4 x double> %x0, <4 x i64> %x1, i8 %x2)
|
||||
|
@ -695,17 +695,17 @@ declare <2 x i64> @llvm.x86.avx512.mask.cvttps2qq.128(<4 x float>, <2 x i64>, i8
|
|||
define <2 x i64>@test_int_x86_avx512_mask_cvtt_ps2qq_128(<4 x float> %x0, <2 x i64> %x1, i8 %x2) {
|
||||
; X86-LABEL: test_int_x86_avx512_mask_cvtt_ps2qq_128:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
|
||||
; X86-NEXT: vcvttps2qq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x7a,0xc8]
|
||||
; X86-NEXT: vcvttps2qq %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x7a,0xc0]
|
||||
; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
|
||||
; X86-NEXT: vmovdqa64 %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x6f,0xc8]
|
||||
; X86-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_int_x86_avx512_mask_cvtt_ps2qq_128:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; X64-NEXT: vcvttps2qq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x7a,0xc8]
|
||||
; X64-NEXT: vcvttps2qq %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x7a,0xc0]
|
||||
; X64-NEXT: vmovdqa64 %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x6f,0xc8]
|
||||
; X64-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <2 x i64> @llvm.x86.avx512.mask.cvttps2qq.128(<4 x float> %x0, <2 x i64> %x1, i8 %x2)
|
||||
|
@ -735,14 +735,16 @@ define <2 x i64> @test_int_x86_avx512_mask_cvtt_ps2qq_128_load(<2 x float>* %p,
|
|||
; X86-LABEL: test_int_x86_avx512_mask_cvtt_ps2qq_128_load:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
|
||||
; X86-NEXT: vcvttps2qq (%eax), %xmm1 # encoding: [0x62,0xf1,0x7d,0x08,0x7a,0x08]
|
||||
; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
|
||||
; X86-NEXT: vcvttps2qq (%eax), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x7a,0x00]
|
||||
; X86-NEXT: vmovdqa64 %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x6f,0xc1]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_int_x86_avx512_mask_cvtt_ps2qq_128_load:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
|
||||
; X64-NEXT: vcvttps2qq (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x7a,0x07]
|
||||
; X64-NEXT: vcvttps2qq (%rdi), %xmm1 # encoding: [0x62,0xf1,0x7d,0x08,0x7a,0x0f]
|
||||
; X64-NEXT: vmovdqa64 %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x6f,0xc1]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
%x0 = load <2 x float>, <2 x float>* %p
|
||||
%x0b = shufflevector <2 x float> %x0, <2 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
|
@ -754,14 +756,16 @@ define <2 x i64> @test_int_x86_avx512_maskz_cvtt_ps2qq_128_load(<2 x float>* %p,
|
|||
; X86-LABEL: test_int_x86_avx512_maskz_cvtt_ps2qq_128_load:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
|
||||
; X86-NEXT: vcvttps2qq (%eax), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x7a,0x00]
|
||||
; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
|
||||
; X86-NEXT: vcvttps2qq (%eax), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x7a,0x00]
|
||||
; X86-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0x6f,0xc0]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_int_x86_avx512_maskz_cvtt_ps2qq_128_load:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
|
||||
; X64-NEXT: vcvttps2qq (%rdi), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x7a,0x07]
|
||||
; X64-NEXT: vcvttps2qq (%rdi), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x7a,0x07]
|
||||
; X64-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0x6f,0xc0]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
%x0 = load <2 x float>, <2 x float>* %p
|
||||
%x0b = shufflevector <2 x float> %x0, <2 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
|
@ -791,14 +795,16 @@ define <2 x i64> @test_int_x86_avx512_mask_cvtt_ps2qq_128_load_2(<2 x float>* %p
|
|||
; X86-LABEL: test_int_x86_avx512_mask_cvtt_ps2qq_128_load_2:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
|
||||
; X86-NEXT: vcvttps2qq (%eax), %xmm1 # encoding: [0x62,0xf1,0x7d,0x08,0x7a,0x08]
|
||||
; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
|
||||
; X86-NEXT: vcvttps2qq (%eax), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x7a,0x00]
|
||||
; X86-NEXT: vmovdqa64 %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x6f,0xc1]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_int_x86_avx512_mask_cvtt_ps2qq_128_load_2:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
|
||||
; X64-NEXT: vcvttps2qq (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x7a,0x07]
|
||||
; X64-NEXT: vcvttps2qq (%rdi), %xmm1 # encoding: [0x62,0xf1,0x7d,0x08,0x7a,0x0f]
|
||||
; X64-NEXT: vmovdqa64 %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x6f,0xc1]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
%x0 = load <2 x float>, <2 x float>* %p
|
||||
%x0b = shufflevector <2 x float> %x0, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
|
@ -810,14 +816,16 @@ define <2 x i64> @test_int_x86_avx512_maskz_cvtt_ps2qq_128_load_2(<2 x float>* %
|
|||
; X86-LABEL: test_int_x86_avx512_maskz_cvtt_ps2qq_128_load_2:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
|
||||
; X86-NEXT: vcvttps2qq (%eax), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x7a,0x00]
|
||||
; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
|
||||
; X86-NEXT: vcvttps2qq (%eax), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x7a,0x00]
|
||||
; X86-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0x6f,0xc0]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_int_x86_avx512_maskz_cvtt_ps2qq_128_load_2:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
|
||||
; X64-NEXT: vcvttps2qq (%rdi), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x7a,0x07]
|
||||
; X64-NEXT: vcvttps2qq (%rdi), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x7a,0x07]
|
||||
; X64-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0x6f,0xc0]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
%x0 = load <2 x float>, <2 x float>* %p
|
||||
%x0b = shufflevector <2 x float> %x0, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
|
@ -845,14 +853,16 @@ define <2 x i64> @test_int_x86_avx512_mask_cvtt_ps2qq_128_load_3(<4 x float>* %p
|
|||
; X86-LABEL: test_int_x86_avx512_mask_cvtt_ps2qq_128_load_3:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
|
||||
; X86-NEXT: vcvttps2qq (%eax), %xmm1 # encoding: [0x62,0xf1,0x7d,0x08,0x7a,0x08]
|
||||
; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
|
||||
; X86-NEXT: vcvttps2qq (%eax), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x7a,0x00]
|
||||
; X86-NEXT: vmovdqa64 %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x6f,0xc1]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_int_x86_avx512_mask_cvtt_ps2qq_128_load_3:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
|
||||
; X64-NEXT: vcvttps2qq (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x7a,0x07]
|
||||
; X64-NEXT: vcvttps2qq (%rdi), %xmm1 # encoding: [0x62,0xf1,0x7d,0x08,0x7a,0x0f]
|
||||
; X64-NEXT: vmovdqa64 %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x6f,0xc1]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
%x0 = load <4 x float>, <4 x float>* %p
|
||||
%res = call <2 x i64> @llvm.x86.avx512.mask.cvttps2qq.128(<4 x float> %x0, <2 x i64> %passthru, i8 %mask)
|
||||
|
@ -863,14 +873,16 @@ define <2 x i64> @test_int_x86_avx512_maskz_cvtt_ps2qq_128_load_3(<4 x float>* %
|
|||
; X86-LABEL: test_int_x86_avx512_maskz_cvtt_ps2qq_128_load_3:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
|
||||
; X86-NEXT: vcvttps2qq (%eax), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x7a,0x00]
|
||||
; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
|
||||
; X86-NEXT: vcvttps2qq (%eax), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x7a,0x00]
|
||||
; X86-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0x6f,0xc0]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_int_x86_avx512_maskz_cvtt_ps2qq_128_load_3:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
|
||||
; X64-NEXT: vcvttps2qq (%rdi), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x7a,0x07]
|
||||
; X64-NEXT: vcvttps2qq (%rdi), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x7a,0x07]
|
||||
; X64-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0x6f,0xc0]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
%x0 = load <4 x float>, <4 x float>* %p
|
||||
%res = call <2 x i64> @llvm.x86.avx512.mask.cvttps2qq.128(<4 x float> %x0, <2 x i64> zeroinitializer, i8 %mask)
|
||||
|
@ -882,17 +894,17 @@ declare <4 x i64> @llvm.x86.avx512.mask.cvttps2qq.256(<4 x float>, <4 x i64>, i8
|
|||
define <4 x i64>@test_int_x86_avx512_mask_cvtt_ps2qq_256(<4 x float> %x0, <4 x i64> %x1, i8 %x2) {
|
||||
; X86-LABEL: test_int_x86_avx512_mask_cvtt_ps2qq_256:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
|
||||
; X86-NEXT: vcvttps2qq %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x7a,0xc8]
|
||||
; X86-NEXT: vcvttps2qq %xmm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x28,0x7a,0xc0]
|
||||
; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
|
||||
; X86-NEXT: vmovdqa64 %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x6f,0xc8]
|
||||
; X86-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_int_x86_avx512_mask_cvtt_ps2qq_256:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; X64-NEXT: vcvttps2qq %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x7a,0xc8]
|
||||
; X64-NEXT: vcvttps2qq %xmm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x28,0x7a,0xc0]
|
||||
; X64-NEXT: vmovdqa64 %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x6f,0xc8]
|
||||
; X64-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <4 x i64> @llvm.x86.avx512.mask.cvttps2qq.256(<4 x float> %x0, <4 x i64> %x1, i8 %x2)
|
||||
|
@ -983,17 +995,17 @@ declare <2 x i64> @llvm.x86.avx512.mask.cvttps2uqq.128(<4 x float>, <2 x i64>, i
|
|||
define <2 x i64>@test_int_x86_avx512_mask_cvtt_ps2uqq_128(<4 x float> %x0, <2 x i64> %x1, i8 %x2) {
|
||||
; X86-LABEL: test_int_x86_avx512_mask_cvtt_ps2uqq_128:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
|
||||
; X86-NEXT: vcvttps2uqq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x78,0xc8]
|
||||
; X86-NEXT: vcvttps2uqq %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x78,0xc0]
|
||||
; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
|
||||
; X86-NEXT: vmovdqa64 %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x6f,0xc8]
|
||||
; X86-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_int_x86_avx512_mask_cvtt_ps2uqq_128:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; X64-NEXT: vcvttps2uqq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x78,0xc8]
|
||||
; X64-NEXT: vcvttps2uqq %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x78,0xc0]
|
||||
; X64-NEXT: vmovdqa64 %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x6f,0xc8]
|
||||
; X64-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <2 x i64> @llvm.x86.avx512.mask.cvttps2uqq.128(<4 x float> %x0, <2 x i64> %x1, i8 %x2)
|
||||
|
@ -1023,14 +1035,16 @@ define <2 x i64> @test_int_x86_avx512_mask_cvtt_ps2uqq_128_load(<2 x float>* %p,
|
|||
; X86-LABEL: test_int_x86_avx512_mask_cvtt_ps2uqq_128_load:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
|
||||
; X86-NEXT: vcvttps2uqq (%eax), %xmm1 # encoding: [0x62,0xf1,0x7d,0x08,0x78,0x08]
|
||||
; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
|
||||
; X86-NEXT: vcvttps2uqq (%eax), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x78,0x00]
|
||||
; X86-NEXT: vmovdqa64 %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x6f,0xc1]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_int_x86_avx512_mask_cvtt_ps2uqq_128_load:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
|
||||
; X64-NEXT: vcvttps2uqq (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x78,0x07]
|
||||
; X64-NEXT: vcvttps2uqq (%rdi), %xmm1 # encoding: [0x62,0xf1,0x7d,0x08,0x78,0x0f]
|
||||
; X64-NEXT: vmovdqa64 %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x6f,0xc1]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
%x0 = load <2 x float>, <2 x float>* %p
|
||||
%x0b = shufflevector <2 x float> %x0, <2 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
|
@ -1042,14 +1056,16 @@ define <2 x i64> @test_int_x86_avx512_maskz_cvtt_ps2uqq_128_load(<2 x float>* %p
|
|||
; X86-LABEL: test_int_x86_avx512_maskz_cvtt_ps2uqq_128_load:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
|
||||
; X86-NEXT: vcvttps2uqq (%eax), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x78,0x00]
|
||||
; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
|
||||
; X86-NEXT: vcvttps2uqq (%eax), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x78,0x00]
|
||||
; X86-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0x6f,0xc0]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_int_x86_avx512_maskz_cvtt_ps2uqq_128_load:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
|
||||
; X64-NEXT: vcvttps2uqq (%rdi), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x78,0x07]
|
||||
; X64-NEXT: vcvttps2uqq (%rdi), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x78,0x07]
|
||||
; X64-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0x6f,0xc0]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
%x0 = load <2 x float>, <2 x float>* %p
|
||||
%x0b = shufflevector <2 x float> %x0, <2 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
|
@ -1079,14 +1095,16 @@ define <2 x i64> @test_int_x86_avx512_mask_cvtt_ps2uqq_128_load_2(<2 x float>* %
|
|||
; X86-LABEL: test_int_x86_avx512_mask_cvtt_ps2uqq_128_load_2:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
|
||||
; X86-NEXT: vcvttps2uqq (%eax), %xmm1 # encoding: [0x62,0xf1,0x7d,0x08,0x78,0x08]
|
||||
; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
|
||||
; X86-NEXT: vcvttps2uqq (%eax), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x78,0x00]
|
||||
; X86-NEXT: vmovdqa64 %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x6f,0xc1]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_int_x86_avx512_mask_cvtt_ps2uqq_128_load_2:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
|
||||
; X64-NEXT: vcvttps2uqq (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x78,0x07]
|
||||
; X64-NEXT: vcvttps2uqq (%rdi), %xmm1 # encoding: [0x62,0xf1,0x7d,0x08,0x78,0x0f]
|
||||
; X64-NEXT: vmovdqa64 %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x6f,0xc1]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
%x0 = load <2 x float>, <2 x float>* %p
|
||||
%x0b = shufflevector <2 x float> %x0, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
|
@ -1098,14 +1116,16 @@ define <2 x i64> @test_int_x86_avx512_maskz_cvtt_ps2uqq_128_load_2(<2 x float>*
|
|||
; X86-LABEL: test_int_x86_avx512_maskz_cvtt_ps2uqq_128_load_2:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
|
||||
; X86-NEXT: vcvttps2uqq (%eax), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x78,0x00]
|
||||
; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
|
||||
; X86-NEXT: vcvttps2uqq (%eax), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x78,0x00]
|
||||
; X86-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0x6f,0xc0]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_int_x86_avx512_maskz_cvtt_ps2uqq_128_load_2:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
|
||||
; X64-NEXT: vcvttps2uqq (%rdi), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x78,0x07]
|
||||
; X64-NEXT: vcvttps2uqq (%rdi), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x78,0x07]
|
||||
; X64-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0x6f,0xc0]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
%x0 = load <2 x float>, <2 x float>* %p
|
||||
%x0b = shufflevector <2 x float> %x0, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
|
@ -1133,14 +1153,16 @@ define <2 x i64> @test_int_x86_avx512_mask_cvtt_ps2uqq_128_load_3(<4 x float>* %
|
|||
; X86-LABEL: test_int_x86_avx512_mask_cvtt_ps2uqq_128_load_3:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
|
||||
; X86-NEXT: vcvttps2uqq (%eax), %xmm1 # encoding: [0x62,0xf1,0x7d,0x08,0x78,0x08]
|
||||
; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
|
||||
; X86-NEXT: vcvttps2uqq (%eax), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x78,0x00]
|
||||
; X86-NEXT: vmovdqa64 %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x6f,0xc1]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_int_x86_avx512_mask_cvtt_ps2uqq_128_load_3:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
|
||||
; X64-NEXT: vcvttps2uqq (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x78,0x07]
|
||||
; X64-NEXT: vcvttps2uqq (%rdi), %xmm1 # encoding: [0x62,0xf1,0x7d,0x08,0x78,0x0f]
|
||||
; X64-NEXT: vmovdqa64 %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x6f,0xc1]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
%x0 = load <4 x float>, <4 x float>* %p
|
||||
%res = call <2 x i64> @llvm.x86.avx512.mask.cvttps2uqq.128(<4 x float> %x0, <2 x i64> %passthru, i8 %mask)
|
||||
|
@ -1151,14 +1173,16 @@ define <2 x i64> @test_int_x86_avx512_maskz_cvtt_ps2uqq_128_load_3(<4 x float>*
|
|||
; X86-LABEL: test_int_x86_avx512_maskz_cvtt_ps2uqq_128_load_3:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
|
||||
; X86-NEXT: vcvttps2uqq (%eax), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x78,0x00]
|
||||
; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
|
||||
; X86-NEXT: vcvttps2uqq (%eax), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x78,0x00]
|
||||
; X86-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0x6f,0xc0]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_int_x86_avx512_maskz_cvtt_ps2uqq_128_load_3:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
|
||||
; X64-NEXT: vcvttps2uqq (%rdi), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x78,0x07]
|
||||
; X64-NEXT: vcvttps2uqq (%rdi), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x78,0x07]
|
||||
; X64-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0x6f,0xc0]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
%x0 = load <4 x float>, <4 x float>* %p
|
||||
%res = call <2 x i64> @llvm.x86.avx512.mask.cvttps2uqq.128(<4 x float> %x0, <2 x i64> zeroinitializer, i8 %mask)
|
||||
|
@ -1170,17 +1194,17 @@ declare <4 x i64> @llvm.x86.avx512.mask.cvttps2uqq.256(<4 x float>, <4 x i64>, i
|
|||
define <4 x i64>@test_int_x86_avx512_mask_cvtt_ps2uqq_256(<4 x float> %x0, <4 x i64> %x1, i8 %x2) {
|
||||
; X86-LABEL: test_int_x86_avx512_mask_cvtt_ps2uqq_256:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
|
||||
; X86-NEXT: vcvttps2uqq %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x78,0xc8]
|
||||
; X86-NEXT: vcvttps2uqq %xmm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x28,0x78,0xc0]
|
||||
; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
|
||||
; X86-NEXT: vmovdqa64 %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x6f,0xc8]
|
||||
; X86-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_int_x86_avx512_mask_cvtt_ps2uqq_256:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; X64-NEXT: vcvttps2uqq %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x78,0xc8]
|
||||
; X64-NEXT: vcvttps2uqq %xmm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x28,0x78,0xc0]
|
||||
; X64-NEXT: vmovdqa64 %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x6f,0xc8]
|
||||
; X64-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <4 x i64> @llvm.x86.avx512.mask.cvttps2uqq.256(<4 x float> %x0, <4 x i64> %x1, i8 %x2)
|
||||
|
|
|
@ -682,15 +682,17 @@ define <2 x i64> @test_mm256_mask_cvttpd_epi32(<2 x i64> %__W, i8 zeroext %__U,
|
|||
; X86-LABEL: test_mm256_mask_cvttpd_epi32:
|
||||
; X86: # %bb.0: # %entry
|
||||
; X86-NEXT: movb {{[0-9]+}}(%esp), %al
|
||||
; X86-NEXT: vcvttpd2dq %ymm1, %xmm1
|
||||
; X86-NEXT: kmovw %eax, %k1
|
||||
; X86-NEXT: vcvttpd2dq %ymm1, %xmm0 {%k1}
|
||||
; X86-NEXT: vmovdqa32 %xmm1, %xmm0 {%k1}
|
||||
; X86-NEXT: vzeroupper
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_mm256_mask_cvttpd_epi32:
|
||||
; X64: # %bb.0: # %entry
|
||||
; X64-NEXT: vcvttpd2dq %ymm1, %xmm1
|
||||
; X64-NEXT: kmovw %edi, %k1
|
||||
; X64-NEXT: vcvttpd2dq %ymm1, %xmm0 {%k1}
|
||||
; X64-NEXT: vmovdqa32 %xmm1, %xmm0 {%k1}
|
||||
; X64-NEXT: vzeroupper
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
|
@ -707,15 +709,17 @@ define <2 x i64> @test_mm256_maskz_cvttpd_epi32(i8 zeroext %__U, <4 x double> %_
|
|||
; X86-LABEL: test_mm256_maskz_cvttpd_epi32:
|
||||
; X86: # %bb.0: # %entry
|
||||
; X86-NEXT: movb {{[0-9]+}}(%esp), %al
|
||||
; X86-NEXT: vcvttpd2dq %ymm0, %xmm0
|
||||
; X86-NEXT: kmovw %eax, %k1
|
||||
; X86-NEXT: vcvttpd2dq %ymm0, %xmm0 {%k1} {z}
|
||||
; X86-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
|
||||
; X86-NEXT: vzeroupper
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_mm256_maskz_cvttpd_epi32:
|
||||
; X64: # %bb.0: # %entry
|
||||
; X64-NEXT: vcvttpd2dq %ymm0, %xmm0
|
||||
; X64-NEXT: kmovw %edi, %k1
|
||||
; X64-NEXT: vcvttpd2dq %ymm0, %xmm0 {%k1} {z}
|
||||
; X64-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
|
||||
; X64-NEXT: vzeroupper
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
|
@ -794,14 +798,16 @@ define <2 x i64> @test_mm256_mask_cvttpd_epu32(<2 x i64> %__W, i8 zeroext %__U,
|
|||
; X86: # %bb.0: # %entry
|
||||
; X86-NEXT: movb {{[0-9]+}}(%esp), %al
|
||||
; X86-NEXT: kmovw %eax, %k1
|
||||
; X86-NEXT: vcvttpd2udq %ymm1, %xmm0 {%k1}
|
||||
; X86-NEXT: vcvttpd2udq %ymm1, %xmm1
|
||||
; X86-NEXT: vmovdqa32 %xmm1, %xmm0 {%k1}
|
||||
; X86-NEXT: vzeroupper
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_mm256_mask_cvttpd_epu32:
|
||||
; X64: # %bb.0: # %entry
|
||||
; X64-NEXT: kmovw %edi, %k1
|
||||
; X64-NEXT: vcvttpd2udq %ymm1, %xmm0 {%k1}
|
||||
; X64-NEXT: vcvttpd2udq %ymm1, %xmm1
|
||||
; X64-NEXT: vmovdqa32 %xmm1, %xmm0 {%k1}
|
||||
; X64-NEXT: vzeroupper
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
|
@ -816,14 +822,16 @@ define <2 x i64> @test_mm256_maskz_cvttpd_epu32(i8 zeroext %__U, <4 x double> %_
|
|||
; X86: # %bb.0: # %entry
|
||||
; X86-NEXT: movb {{[0-9]+}}(%esp), %al
|
||||
; X86-NEXT: kmovw %eax, %k1
|
||||
; X86-NEXT: vcvttpd2udq %ymm0, %xmm0 {%k1} {z}
|
||||
; X86-NEXT: vcvttpd2udq %ymm0, %xmm0
|
||||
; X86-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
|
||||
; X86-NEXT: vzeroupper
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_mm256_maskz_cvttpd_epu32:
|
||||
; X64: # %bb.0: # %entry
|
||||
; X64-NEXT: kmovw %edi, %k1
|
||||
; X64-NEXT: vcvttpd2udq %ymm0, %xmm0 {%k1} {z}
|
||||
; X64-NEXT: vcvttpd2udq %ymm0, %xmm0
|
||||
; X64-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
|
||||
; X64-NEXT: vzeroupper
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
|
@ -836,14 +844,16 @@ define <2 x i64> @test_mm_mask_cvttps_epi32(<2 x i64> %__W, i8 zeroext %__U, <4
|
|||
; X86-LABEL: test_mm_mask_cvttps_epi32:
|
||||
; X86: # %bb.0: # %entry
|
||||
; X86-NEXT: movb {{[0-9]+}}(%esp), %al
|
||||
; X86-NEXT: vcvttps2dq %xmm1, %xmm1
|
||||
; X86-NEXT: kmovw %eax, %k1
|
||||
; X86-NEXT: vcvttps2dq %xmm1, %xmm0 {%k1}
|
||||
; X86-NEXT: vmovdqa32 %xmm1, %xmm0 {%k1}
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_mm_mask_cvttps_epi32:
|
||||
; X64: # %bb.0: # %entry
|
||||
; X64-NEXT: vcvttps2dq %xmm1, %xmm1
|
||||
; X64-NEXT: kmovw %edi, %k1
|
||||
; X64-NEXT: vcvttps2dq %xmm1, %xmm0 {%k1}
|
||||
; X64-NEXT: vmovdqa32 %xmm1, %xmm0 {%k1}
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
%0 = tail call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %__A) #8
|
||||
|
@ -859,14 +869,16 @@ define <2 x i64> @test_mm_maskz_cvttps_epi32(i8 zeroext %__U, <4 x float> %__A)
|
|||
; X86-LABEL: test_mm_maskz_cvttps_epi32:
|
||||
; X86: # %bb.0: # %entry
|
||||
; X86-NEXT: movb {{[0-9]+}}(%esp), %al
|
||||
; X86-NEXT: vcvttps2dq %xmm0, %xmm0
|
||||
; X86-NEXT: kmovw %eax, %k1
|
||||
; X86-NEXT: vcvttps2dq %xmm0, %xmm0 {%k1} {z}
|
||||
; X86-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_mm_maskz_cvttps_epi32:
|
||||
; X64: # %bb.0: # %entry
|
||||
; X64-NEXT: vcvttps2dq %xmm0, %xmm0
|
||||
; X64-NEXT: kmovw %edi, %k1
|
||||
; X64-NEXT: vcvttps2dq %xmm0, %xmm0 {%k1} {z}
|
||||
; X64-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
%0 = tail call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %__A) #8
|
||||
|
@ -881,14 +893,16 @@ define <4 x i64> @test_mm256_mask_cvttps_epi32(<4 x i64> %__W, i8 zeroext %__U,
|
|||
; X86-LABEL: test_mm256_mask_cvttps_epi32:
|
||||
; X86: # %bb.0: # %entry
|
||||
; X86-NEXT: movb {{[0-9]+}}(%esp), %al
|
||||
; X86-NEXT: vcvttps2dq %ymm1, %ymm1
|
||||
; X86-NEXT: kmovw %eax, %k1
|
||||
; X86-NEXT: vcvttps2dq %ymm1, %ymm0 {%k1}
|
||||
; X86-NEXT: vmovdqa32 %ymm1, %ymm0 {%k1}
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_mm256_mask_cvttps_epi32:
|
||||
; X64: # %bb.0: # %entry
|
||||
; X64-NEXT: vcvttps2dq %ymm1, %ymm1
|
||||
; X64-NEXT: kmovw %edi, %k1
|
||||
; X64-NEXT: vcvttps2dq %ymm1, %ymm0 {%k1}
|
||||
; X64-NEXT: vmovdqa32 %ymm1, %ymm0 {%k1}
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
%0 = tail call <8 x i32> @llvm.x86.avx.cvtt.ps2dq.256(<8 x float> %__A) #8
|
||||
|
@ -903,14 +917,16 @@ define <4 x i64> @test_mm256_maskz_cvttps_epi32(i8 zeroext %__U, <8 x float> %__
|
|||
; X86-LABEL: test_mm256_maskz_cvttps_epi32:
|
||||
; X86: # %bb.0: # %entry
|
||||
; X86-NEXT: movb {{[0-9]+}}(%esp), %al
|
||||
; X86-NEXT: vcvttps2dq %ymm0, %ymm0
|
||||
; X86-NEXT: kmovw %eax, %k1
|
||||
; X86-NEXT: vcvttps2dq %ymm0, %ymm0 {%k1} {z}
|
||||
; X86-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_mm256_maskz_cvttps_epi32:
|
||||
; X64: # %bb.0: # %entry
|
||||
; X64-NEXT: vcvttps2dq %ymm0, %ymm0
|
||||
; X64-NEXT: kmovw %edi, %k1
|
||||
; X64-NEXT: vcvttps2dq %ymm0, %ymm0 {%k1} {z}
|
||||
; X64-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
%0 = tail call <8 x i32> @llvm.x86.avx.cvtt.ps2dq.256(<8 x float> %__A) #8
|
||||
|
@ -936,13 +952,15 @@ define <2 x i64> @test_mm_mask_cvttps_epu32(<2 x i64> %__W, i8 zeroext %__U, <4
|
|||
; X86: # %bb.0: # %entry
|
||||
; X86-NEXT: movb {{[0-9]+}}(%esp), %al
|
||||
; X86-NEXT: kmovw %eax, %k1
|
||||
; X86-NEXT: vcvttps2udq %xmm1, %xmm0 {%k1}
|
||||
; X86-NEXT: vcvttps2udq %xmm1, %xmm1
|
||||
; X86-NEXT: vmovdqa32 %xmm1, %xmm0 {%k1}
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_mm_mask_cvttps_epu32:
|
||||
; X64: # %bb.0: # %entry
|
||||
; X64-NEXT: kmovw %edi, %k1
|
||||
; X64-NEXT: vcvttps2udq %xmm1, %xmm0 {%k1}
|
||||
; X64-NEXT: vcvttps2udq %xmm1, %xmm1
|
||||
; X64-NEXT: vmovdqa32 %xmm1, %xmm0 {%k1}
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
%0 = bitcast <2 x i64> %__W to <4 x i32>
|
||||
|
@ -956,13 +974,15 @@ define <2 x i64> @test_mm_maskz_cvttps_epu32(i8 zeroext %__U, <4 x float> %__A)
|
|||
; X86: # %bb.0: # %entry
|
||||
; X86-NEXT: movb {{[0-9]+}}(%esp), %al
|
||||
; X86-NEXT: kmovw %eax, %k1
|
||||
; X86-NEXT: vcvttps2udq %xmm0, %xmm0 {%k1} {z}
|
||||
; X86-NEXT: vcvttps2udq %xmm0, %xmm0
|
||||
; X86-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_mm_maskz_cvttps_epu32:
|
||||
; X64: # %bb.0: # %entry
|
||||
; X64-NEXT: kmovw %edi, %k1
|
||||
; X64-NEXT: vcvttps2udq %xmm0, %xmm0 {%k1} {z}
|
||||
; X64-NEXT: vcvttps2udq %xmm0, %xmm0
|
||||
; X64-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
%0 = tail call <4 x i32> @llvm.x86.avx512.mask.cvttps2udq.128(<4 x float> %__A, <4 x i32> zeroinitializer, i8 %__U) #8
|
||||
|
@ -986,13 +1006,15 @@ define <4 x i64> @test_mm256_mask_cvttps_epu32(<4 x i64> %__W, i8 zeroext %__U,
|
|||
; X86: # %bb.0: # %entry
|
||||
; X86-NEXT: movb {{[0-9]+}}(%esp), %al
|
||||
; X86-NEXT: kmovw %eax, %k1
|
||||
; X86-NEXT: vcvttps2udq %ymm1, %ymm0 {%k1}
|
||||
; X86-NEXT: vcvttps2udq %ymm1, %ymm1
|
||||
; X86-NEXT: vmovdqa32 %ymm1, %ymm0 {%k1}
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_mm256_mask_cvttps_epu32:
|
||||
; X64: # %bb.0: # %entry
|
||||
; X64-NEXT: kmovw %edi, %k1
|
||||
; X64-NEXT: vcvttps2udq %ymm1, %ymm0 {%k1}
|
||||
; X64-NEXT: vcvttps2udq %ymm1, %ymm1
|
||||
; X64-NEXT: vmovdqa32 %ymm1, %ymm0 {%k1}
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
%0 = bitcast <4 x i64> %__W to <8 x i32>
|
||||
|
@ -1006,13 +1028,15 @@ define <4 x i64> @test_mm256_maskz_cvttps_epu32(i8 zeroext %__U, <8 x float> %__
|
|||
; X86: # %bb.0: # %entry
|
||||
; X86-NEXT: movb {{[0-9]+}}(%esp), %al
|
||||
; X86-NEXT: kmovw %eax, %k1
|
||||
; X86-NEXT: vcvttps2udq %ymm0, %ymm0 {%k1} {z}
|
||||
; X86-NEXT: vcvttps2udq %ymm0, %ymm0
|
||||
; X86-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_mm256_maskz_cvttps_epu32:
|
||||
; X64: # %bb.0: # %entry
|
||||
; X64-NEXT: kmovw %edi, %k1
|
||||
; X64-NEXT: vcvttps2udq %ymm0, %ymm0 {%k1} {z}
|
||||
; X64-NEXT: vcvttps2udq %ymm0, %ymm0
|
||||
; X64-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
%0 = tail call <8 x i32> @llvm.x86.avx512.mask.cvttps2udq.256(<8 x float> %__A, <8 x i32> zeroinitializer, i8 %__U) #8
|
||||
|
|
|
@ -7859,7 +7859,6 @@ define <8 x i32>@test_int_x86_avx512_mask_inserti32x4_256(<8 x i32> %x0, <4 x i3
|
|||
; X64-NEXT: vpaddd %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfe,0xc0]
|
||||
; X64-NEXT: vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
|
||||
%res = call <8 x i32> @llvm.x86.avx512.mask.inserti32x4.256(<8 x i32> %x0, <4 x i32> %x1, i32 1, <8 x i32> %x3, i8 %x4)
|
||||
%res1 = call <8 x i32> @llvm.x86.avx512.mask.inserti32x4.256(<8 x i32> %x0, <4 x i32> %x1, i32 1, <8 x i32> %x3, i8 -1)
|
||||
%res2 = call <8 x i32> @llvm.x86.avx512.mask.inserti32x4.256(<8 x i32> %x0, <4 x i32> %x1, i32 1, <8 x i32> zeroinitializer, i8 %x4)
|
||||
|
@ -10375,20 +10374,20 @@ declare <4 x i32> @llvm.x86.avx512.mask.cvttpd2dq.256(<4 x double>, <4 x i32>, i
|
|||
define <4 x i32>@test_int_x86_avx512_mask_cvtt_pd2dq_256(<4 x double> %x0, <4 x i32> %x1, i8 %x2) {
|
||||
; X86-LABEL: test_int_x86_avx512_mask_cvtt_pd2dq_256:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: vcvttpd2dq %ymm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe6,0xd0]
|
||||
; X86-NEXT: vcvttpd2dq %ymm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe6,0xc0]
|
||||
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
|
||||
; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
|
||||
; X86-NEXT: vcvttpd2dq %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0xe6,0xc8]
|
||||
; X86-NEXT: vpaddd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc2]
|
||||
; X86-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x6f,0xc8]
|
||||
; X86-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
|
||||
; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_int_x86_avx512_mask_cvtt_pd2dq_256:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: vcvttpd2dq %ymm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe6,0xd0]
|
||||
; X64-NEXT: vcvttpd2dq %ymm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe6,0xc0]
|
||||
; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; X64-NEXT: vcvttpd2dq %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0xe6,0xc8]
|
||||
; X64-NEXT: vpaddd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc2]
|
||||
; X64-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x6f,0xc8]
|
||||
; X64-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
|
||||
; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <4 x i32> @llvm.x86.avx512.mask.cvttpd2dq.256(<4 x double> %x0, <4 x i32> %x1, i8 %x2)
|
||||
|
@ -10402,19 +10401,19 @@ declare <4 x i32> @llvm.x86.avx512.mask.cvttps2dq.128(<4 x float>, <4 x i32>, i8
|
|||
define <4 x i32>@test_int_x86_avx512_mask_cvtt_ps2dq_128(<4 x float> %x0, <4 x i32> %x1, i8 %x2) {
|
||||
; X86-LABEL: test_int_x86_avx512_mask_cvtt_ps2dq_128:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: vcvttps2dq %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x5b,0xd0]
|
||||
; X86-NEXT: vcvttps2dq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x5b,0xc0]
|
||||
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
|
||||
; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
|
||||
; X86-NEXT: vcvttps2dq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x09,0x5b,0xc8]
|
||||
; X86-NEXT: vpaddd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc2]
|
||||
; X86-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x6f,0xc8]
|
||||
; X86-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_int_x86_avx512_mask_cvtt_ps2dq_128:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: vcvttps2dq %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x5b,0xd0]
|
||||
; X64-NEXT: vcvttps2dq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x5b,0xc0]
|
||||
; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; X64-NEXT: vcvttps2dq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x09,0x5b,0xc8]
|
||||
; X64-NEXT: vpaddd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc2]
|
||||
; X64-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x6f,0xc8]
|
||||
; X64-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <4 x i32> @llvm.x86.avx512.mask.cvttps2dq.128(<4 x float> %x0, <4 x i32> %x1, i8 %x2)
|
||||
%res1 = call <4 x i32> @llvm.x86.avx512.mask.cvttps2dq.128(<4 x float> %x0, <4 x i32> %x1, i8 -1)
|
||||
|
@ -10427,19 +10426,19 @@ declare <8 x i32> @llvm.x86.avx512.mask.cvttps2dq.256(<8 x float>, <8 x i32>, i8
|
|||
define <8 x i32>@test_int_x86_avx512_mask_cvtt_ps2dq_256(<8 x float> %x0, <8 x i32> %x1, i8 %x2) {
|
||||
; X86-LABEL: test_int_x86_avx512_mask_cvtt_ps2dq_256:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: vcvttps2dq %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x5b,0xd0]
|
||||
; X86-NEXT: vcvttps2dq %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x5b,0xc0]
|
||||
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
|
||||
; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
|
||||
; X86-NEXT: vcvttps2dq %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x29,0x5b,0xc8]
|
||||
; X86-NEXT: vpaddd %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc2]
|
||||
; X86-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x6f,0xc8]
|
||||
; X86-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_int_x86_avx512_mask_cvtt_ps2dq_256:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: vcvttps2dq %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x5b,0xd0]
|
||||
; X64-NEXT: vcvttps2dq %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x5b,0xc0]
|
||||
; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; X64-NEXT: vcvttps2dq %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x29,0x5b,0xc8]
|
||||
; X64-NEXT: vpaddd %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc2]
|
||||
; X64-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x6f,0xc8]
|
||||
; X64-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <8 x i32> @llvm.x86.avx512.mask.cvttps2dq.256(<8 x float> %x0, <8 x i32> %x1, i8 %x2)
|
||||
%res1 = call <8 x i32> @llvm.x86.avx512.mask.cvttps2dq.256(<8 x float> %x0, <8 x i32> %x1, i8 -1)
|
||||
|
|
|
@ -3704,8 +3704,8 @@ define <4 x i32>@test_int_x86_avx512_mask_cvtt_pd2udq_256(<4 x double> %x0, <4 x
|
|||
; X86: # %bb.0:
|
||||
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
|
||||
; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
|
||||
; X86-NEXT: vcvttpd2udq %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfc,0x29,0x78,0xc8]
|
||||
; X86-NEXT: vcvttpd2udq %ymm0, %xmm0 # encoding: [0x62,0xf1,0xfc,0x28,0x78,0xc0]
|
||||
; X86-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x6f,0xc8]
|
||||
; X86-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
|
||||
; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
|
@ -3713,8 +3713,8 @@ define <4 x i32>@test_int_x86_avx512_mask_cvtt_pd2udq_256(<4 x double> %x0, <4 x
|
|||
; X64-LABEL: test_int_x86_avx512_mask_cvtt_pd2udq_256:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; X64-NEXT: vcvttpd2udq %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfc,0x29,0x78,0xc8]
|
||||
; X64-NEXT: vcvttpd2udq %ymm0, %xmm0 # encoding: [0x62,0xf1,0xfc,0x28,0x78,0xc0]
|
||||
; X64-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x6f,0xc8]
|
||||
; X64-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
|
||||
; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
|
@ -3731,16 +3731,16 @@ define <4 x i32>@test_int_x86_avx512_mask_cvtt_ps2udq_128(<4 x float> %x0, <4 x
|
|||
; X86: # %bb.0:
|
||||
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
|
||||
; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
|
||||
; X86-NEXT: vcvttps2udq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x78,0xc8]
|
||||
; X86-NEXT: vcvttps2udq %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7c,0x08,0x78,0xc0]
|
||||
; X86-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x6f,0xc8]
|
||||
; X86-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_int_x86_avx512_mask_cvtt_ps2udq_128:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; X64-NEXT: vcvttps2udq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x78,0xc8]
|
||||
; X64-NEXT: vcvttps2udq %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7c,0x08,0x78,0xc0]
|
||||
; X64-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x6f,0xc8]
|
||||
; X64-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <4 x i32> @llvm.x86.avx512.mask.cvttps2udq.128(<4 x float> %x0, <4 x i32> %x1, i8 %x2)
|
||||
|
@ -3756,16 +3756,16 @@ define <8 x i32>@test_int_x86_avx512_mask_cvtt_ps2udq_256(<8 x float> %x0, <8 x
|
|||
; X86: # %bb.0:
|
||||
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
|
||||
; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
|
||||
; X86-NEXT: vcvttps2udq %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x78,0xc8]
|
||||
; X86-NEXT: vcvttps2udq %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7c,0x28,0x78,0xc0]
|
||||
; X86-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x6f,0xc8]
|
||||
; X86-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_int_x86_avx512_mask_cvtt_ps2udq_256:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; X64-NEXT: vcvttps2udq %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x78,0xc8]
|
||||
; X64-NEXT: vcvttps2udq %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7c,0x28,0x78,0xc0]
|
||||
; X64-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x6f,0xc8]
|
||||
; X64-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <8 x i32> @llvm.x86.avx512.mask.cvttps2udq.256(<8 x float> %x0, <8 x i32> %x1, i8 %x2)
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -11,6 +11,16 @@ declare x86_fp80 @llvm.experimental.constrained.fpext.x86_fp80.f64(double, metad
|
|||
declare x86_fp80 @llvm.experimental.constrained.sqrt.x86_fp80(x86_fp80, metadata, metadata)
|
||||
declare float @llvm.experimental.constrained.fptrunc.f32.x86_fp80(x86_fp80, metadata, metadata)
|
||||
declare double @llvm.experimental.constrained.fptrunc.f64.x86_fp80(x86_fp80, metadata, metadata)
|
||||
declare i1 @llvm.experimental.constrained.fptosi.i1.x86_fp80(x86_fp80, metadata)
|
||||
declare i8 @llvm.experimental.constrained.fptosi.i8.x86_fp80(x86_fp80, metadata)
|
||||
declare i16 @llvm.experimental.constrained.fptosi.i16.x86_fp80(x86_fp80, metadata)
|
||||
declare i32 @llvm.experimental.constrained.fptosi.i32.x86_fp80(x86_fp80, metadata)
|
||||
declare i64 @llvm.experimental.constrained.fptosi.i64.x86_fp80(x86_fp80, metadata)
|
||||
declare i1 @llvm.experimental.constrained.fptoui.i1.x86_fp80(x86_fp80, metadata)
|
||||
declare i8 @llvm.experimental.constrained.fptoui.i8.x86_fp80(x86_fp80, metadata)
|
||||
declare i16 @llvm.experimental.constrained.fptoui.i16.x86_fp80(x86_fp80, metadata)
|
||||
declare i32 @llvm.experimental.constrained.fptoui.i32.x86_fp80(x86_fp80, metadata)
|
||||
declare i64 @llvm.experimental.constrained.fptoui.i64.x86_fp80(x86_fp80, metadata)
|
||||
|
||||
define x86_fp80 @fadd_fp80(x86_fp80 %a, x86_fp80 %b) nounwind strictfp {
|
||||
; X86-LABEL: fadd_fp80:
|
||||
|
@ -190,4 +200,405 @@ define x86_fp80 @fsqrt_fp80(x86_fp80 %a) nounwind strictfp {
|
|||
ret x86_fp80 %ret
|
||||
}
|
||||
|
||||
define i1 @fp80_to_sint1(x86_fp80 %x) #0 {
|
||||
; X86-LABEL: fp80_to_sint1:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: subl $8, %esp
|
||||
; X86-NEXT: .cfi_def_cfa_offset 12
|
||||
; X86-NEXT: fldt {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: fnstcw {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: orl $3072, %eax # imm = 0xC00
|
||||
; X86-NEXT: movw %ax, {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: fldcw {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: fistps {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: fldcw {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: movb {{[0-9]+}}(%esp), %al
|
||||
; X86-NEXT: addl $8, %esp
|
||||
; X86-NEXT: .cfi_def_cfa_offset 4
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: fp80_to_sint1:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: fldt {{[0-9]+}}(%rsp)
|
||||
; X64-NEXT: fnstcw -{{[0-9]+}}(%rsp)
|
||||
; X64-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax
|
||||
; X64-NEXT: orl $3072, %eax # imm = 0xC00
|
||||
; X64-NEXT: movw %ax, -{{[0-9]+}}(%rsp)
|
||||
; X64-NEXT: fldcw -{{[0-9]+}}(%rsp)
|
||||
; X64-NEXT: fistps -{{[0-9]+}}(%rsp)
|
||||
; X64-NEXT: fldcw -{{[0-9]+}}(%rsp)
|
||||
; X64-NEXT: movb -{{[0-9]+}}(%rsp), %al
|
||||
; X64-NEXT: retq
|
||||
%result = call i1 @llvm.experimental.constrained.fptosi.i1.x86_fp80(x86_fp80 %x,
|
||||
metadata !"fpexcept.strict") #0
|
||||
ret i1 %result
|
||||
}
|
||||
|
||||
define i8 @fp80_to_sint8(x86_fp80 %x) #0 {
|
||||
; X86-LABEL: fp80_to_sint8:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: subl $8, %esp
|
||||
; X86-NEXT: .cfi_def_cfa_offset 12
|
||||
; X86-NEXT: fldt {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: fnstcw {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: orl $3072, %eax # imm = 0xC00
|
||||
; X86-NEXT: movw %ax, {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: fldcw {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: fistps {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: fldcw {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: movb {{[0-9]+}}(%esp), %al
|
||||
; X86-NEXT: addl $8, %esp
|
||||
; X86-NEXT: .cfi_def_cfa_offset 4
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: fp80_to_sint8:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: fldt {{[0-9]+}}(%rsp)
|
||||
; X64-NEXT: fnstcw -{{[0-9]+}}(%rsp)
|
||||
; X64-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax
|
||||
; X64-NEXT: orl $3072, %eax # imm = 0xC00
|
||||
; X64-NEXT: movw %ax, -{{[0-9]+}}(%rsp)
|
||||
; X64-NEXT: fldcw -{{[0-9]+}}(%rsp)
|
||||
; X64-NEXT: fistps -{{[0-9]+}}(%rsp)
|
||||
; X64-NEXT: fldcw -{{[0-9]+}}(%rsp)
|
||||
; X64-NEXT: movb -{{[0-9]+}}(%rsp), %al
|
||||
; X64-NEXT: retq
|
||||
%result = call i8 @llvm.experimental.constrained.fptosi.i8.x86_fp80(x86_fp80 %x,
|
||||
metadata !"fpexcept.strict") #0
|
||||
ret i8 %result
|
||||
}
|
||||
|
||||
define i16 @fp80_to_sint16(x86_fp80 %x) #0 {
|
||||
; X86-LABEL: fp80_to_sint16:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: subl $8, %esp
|
||||
; X86-NEXT: .cfi_def_cfa_offset 12
|
||||
; X86-NEXT: fldt {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: fnstcw {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: orl $3072, %eax # imm = 0xC00
|
||||
; X86-NEXT: movw %ax, {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: fldcw {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: fistps {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: fldcw {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: addl $8, %esp
|
||||
; X86-NEXT: .cfi_def_cfa_offset 4
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: fp80_to_sint16:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: fldt {{[0-9]+}}(%rsp)
|
||||
; X64-NEXT: fnstcw -{{[0-9]+}}(%rsp)
|
||||
; X64-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax
|
||||
; X64-NEXT: orl $3072, %eax # imm = 0xC00
|
||||
; X64-NEXT: movw %ax, -{{[0-9]+}}(%rsp)
|
||||
; X64-NEXT: fldcw -{{[0-9]+}}(%rsp)
|
||||
; X64-NEXT: fistps -{{[0-9]+}}(%rsp)
|
||||
; X64-NEXT: fldcw -{{[0-9]+}}(%rsp)
|
||||
; X64-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax
|
||||
; X64-NEXT: retq
|
||||
%result = call i16 @llvm.experimental.constrained.fptosi.i16.x86_fp80(x86_fp80 %x,
|
||||
metadata !"fpexcept.strict") #0
|
||||
ret i16 %result
|
||||
}
|
||||
|
||||
define i32 @fp80_to_sint32(x86_fp80 %x) #0 {
|
||||
; X86-LABEL: fp80_to_sint32:
|
||||
; X86: # %bb.0: # %entry
|
||||
; X86-NEXT: subl $8, %esp
|
||||
; X86-NEXT: .cfi_def_cfa_offset 12
|
||||
; X86-NEXT: fldt {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: fnstcw (%esp)
|
||||
; X86-NEXT: movzwl (%esp), %eax
|
||||
; X86-NEXT: orl $3072, %eax # imm = 0xC00
|
||||
; X86-NEXT: movw %ax, {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: fldcw {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: fistpl {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: fldcw (%esp)
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: addl $8, %esp
|
||||
; X86-NEXT: .cfi_def_cfa_offset 4
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: fp80_to_sint32:
|
||||
; X64: # %bb.0: # %entry
|
||||
; X64-NEXT: fldt {{[0-9]+}}(%rsp)
|
||||
; X64-NEXT: fnstcw -{{[0-9]+}}(%rsp)
|
||||
; X64-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax
|
||||
; X64-NEXT: orl $3072, %eax # imm = 0xC00
|
||||
; X64-NEXT: movw %ax, -{{[0-9]+}}(%rsp)
|
||||
; X64-NEXT: fldcw -{{[0-9]+}}(%rsp)
|
||||
; X64-NEXT: fistpl -{{[0-9]+}}(%rsp)
|
||||
; X64-NEXT: fldcw -{{[0-9]+}}(%rsp)
|
||||
; X64-NEXT: movl -{{[0-9]+}}(%rsp), %eax
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
%result = call i32 @llvm.experimental.constrained.fptosi.i32.x86_fp80(x86_fp80 %x,
|
||||
metadata !"fpexcept.strict") #0
|
||||
ret i32 %result
|
||||
}
|
||||
|
||||
define i64 @fp80_to_sint64(x86_fp80 %x) #0 {
|
||||
; X86-LABEL: fp80_to_sint64:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: pushl %ebp
|
||||
; X86-NEXT: .cfi_def_cfa_offset 8
|
||||
; X86-NEXT: .cfi_offset %ebp, -8
|
||||
; X86-NEXT: movl %esp, %ebp
|
||||
; X86-NEXT: .cfi_def_cfa_register %ebp
|
||||
; X86-NEXT: andl $-8, %esp
|
||||
; X86-NEXT: subl $16, %esp
|
||||
; X86-NEXT: fldt 8(%ebp)
|
||||
; X86-NEXT: fnstcw {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: orl $3072, %eax # imm = 0xC00
|
||||
; X86-NEXT: movw %ax, {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: fldcw {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: fistpll {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: fldcw {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
|
||||
; X86-NEXT: movl %ebp, %esp
|
||||
; X86-NEXT: popl %ebp
|
||||
; X86-NEXT: .cfi_def_cfa %esp, 4
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: fp80_to_sint64:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: fldt {{[0-9]+}}(%rsp)
|
||||
; X64-NEXT: fnstcw -{{[0-9]+}}(%rsp)
|
||||
; X64-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax
|
||||
; X64-NEXT: orl $3072, %eax # imm = 0xC00
|
||||
; X64-NEXT: movw %ax, -{{[0-9]+}}(%rsp)
|
||||
; X64-NEXT: fldcw -{{[0-9]+}}(%rsp)
|
||||
; X64-NEXT: fistpll -{{[0-9]+}}(%rsp)
|
||||
; X64-NEXT: fldcw -{{[0-9]+}}(%rsp)
|
||||
; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rax
|
||||
; X64-NEXT: retq
|
||||
%result = call i64 @llvm.experimental.constrained.fptosi.i64.x86_fp80(x86_fp80 %x,
|
||||
metadata !"fpexcept.strict") #0
|
||||
ret i64 %result
|
||||
}
|
||||
|
||||
define i1 @fp80_to_uint1(x86_fp80 %x) #0 {
|
||||
; X86-LABEL: fp80_to_uint1:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: subl $8, %esp
|
||||
; X86-NEXT: .cfi_def_cfa_offset 12
|
||||
; X86-NEXT: fldt {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: fnstcw {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: orl $3072, %eax # imm = 0xC00
|
||||
; X86-NEXT: movw %ax, {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: fldcw {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: fistps {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: fldcw {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: movb {{[0-9]+}}(%esp), %al
|
||||
; X86-NEXT: addl $8, %esp
|
||||
; X86-NEXT: .cfi_def_cfa_offset 4
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: fp80_to_uint1:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: fldt {{[0-9]+}}(%rsp)
|
||||
; X64-NEXT: fnstcw -{{[0-9]+}}(%rsp)
|
||||
; X64-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax
|
||||
; X64-NEXT: orl $3072, %eax # imm = 0xC00
|
||||
; X64-NEXT: movw %ax, -{{[0-9]+}}(%rsp)
|
||||
; X64-NEXT: fldcw -{{[0-9]+}}(%rsp)
|
||||
; X64-NEXT: fistps -{{[0-9]+}}(%rsp)
|
||||
; X64-NEXT: fldcw -{{[0-9]+}}(%rsp)
|
||||
; X64-NEXT: movb -{{[0-9]+}}(%rsp), %al
|
||||
; X64-NEXT: retq
|
||||
%result = call i1 @llvm.experimental.constrained.fptoui.i1.x86_fp80(x86_fp80 %x,
|
||||
metadata !"fpexcept.strict") #0
|
||||
ret i1 %result
|
||||
}
|
||||
|
||||
define i8 @fp80_to_uint8(x86_fp80 %x) #0 {
|
||||
; X86-LABEL: fp80_to_uint8:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: subl $8, %esp
|
||||
; X86-NEXT: .cfi_def_cfa_offset 12
|
||||
; X86-NEXT: fldt {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: fnstcw {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: orl $3072, %eax # imm = 0xC00
|
||||
; X86-NEXT: movw %ax, {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: fldcw {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: fistps {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: fldcw {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: movb {{[0-9]+}}(%esp), %al
|
||||
; X86-NEXT: addl $8, %esp
|
||||
; X86-NEXT: .cfi_def_cfa_offset 4
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: fp80_to_uint8:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: fldt {{[0-9]+}}(%rsp)
|
||||
; X64-NEXT: fnstcw -{{[0-9]+}}(%rsp)
|
||||
; X64-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax
|
||||
; X64-NEXT: orl $3072, %eax # imm = 0xC00
|
||||
; X64-NEXT: movw %ax, -{{[0-9]+}}(%rsp)
|
||||
; X64-NEXT: fldcw -{{[0-9]+}}(%rsp)
|
||||
; X64-NEXT: fistps -{{[0-9]+}}(%rsp)
|
||||
; X64-NEXT: fldcw -{{[0-9]+}}(%rsp)
|
||||
; X64-NEXT: movb -{{[0-9]+}}(%rsp), %al
|
||||
; X64-NEXT: retq
|
||||
%result = call i8 @llvm.experimental.constrained.fptoui.i8.x86_fp80(x86_fp80 %x,
|
||||
metadata !"fpexcept.strict") #0
|
||||
ret i8 %result
|
||||
}
|
||||
|
||||
define i16 @fp80_to_uint16(x86_fp80 %x) #0 {
|
||||
; X86-LABEL: fp80_to_uint16:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: subl $8, %esp
|
||||
; X86-NEXT: .cfi_def_cfa_offset 12
|
||||
; X86-NEXT: fldt {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: fnstcw (%esp)
|
||||
; X86-NEXT: movzwl (%esp), %eax
|
||||
; X86-NEXT: orl $3072, %eax # imm = 0xC00
|
||||
; X86-NEXT: movw %ax, {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: fldcw {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: fistpl {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: fldcw (%esp)
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; X86-NEXT: addl $8, %esp
|
||||
; X86-NEXT: .cfi_def_cfa_offset 4
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: fp80_to_uint16:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: fldt {{[0-9]+}}(%rsp)
|
||||
; X64-NEXT: fnstcw -{{[0-9]+}}(%rsp)
|
||||
; X64-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax
|
||||
; X64-NEXT: orl $3072, %eax # imm = 0xC00
|
||||
; X64-NEXT: movw %ax, -{{[0-9]+}}(%rsp)
|
||||
; X64-NEXT: fldcw -{{[0-9]+}}(%rsp)
|
||||
; X64-NEXT: fistpl -{{[0-9]+}}(%rsp)
|
||||
; X64-NEXT: fldcw -{{[0-9]+}}(%rsp)
|
||||
; X64-NEXT: movl -{{[0-9]+}}(%rsp), %eax
|
||||
; X64-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; X64-NEXT: retq
|
||||
%result = call i16 @llvm.experimental.constrained.fptoui.i16.x86_fp80(x86_fp80 %x,
|
||||
metadata !"fpexcept.strict") #0
|
||||
ret i16 %result
|
||||
}
|
||||
|
||||
define i32 @fp80_to_uint32(x86_fp80 %x) #0 {
|
||||
; X86-LABEL: fp80_to_uint32:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: pushl %ebp
|
||||
; X86-NEXT: .cfi_def_cfa_offset 8
|
||||
; X86-NEXT: .cfi_offset %ebp, -8
|
||||
; X86-NEXT: movl %esp, %ebp
|
||||
; X86-NEXT: .cfi_def_cfa_register %ebp
|
||||
; X86-NEXT: andl $-8, %esp
|
||||
; X86-NEXT: subl $16, %esp
|
||||
; X86-NEXT: fldt 8(%ebp)
|
||||
; X86-NEXT: fnstcw {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: orl $3072, %eax # imm = 0xC00
|
||||
; X86-NEXT: movw %ax, {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: fldcw {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: fistpll {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: fldcw {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: movl %ebp, %esp
|
||||
; X86-NEXT: popl %ebp
|
||||
; X86-NEXT: .cfi_def_cfa %esp, 4
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: fp80_to_uint32:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: fldt {{[0-9]+}}(%rsp)
|
||||
; X64-NEXT: fnstcw -{{[0-9]+}}(%rsp)
|
||||
; X64-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax
|
||||
; X64-NEXT: orl $3072, %eax # imm = 0xC00
|
||||
; X64-NEXT: movw %ax, -{{[0-9]+}}(%rsp)
|
||||
; X64-NEXT: fldcw -{{[0-9]+}}(%rsp)
|
||||
; X64-NEXT: fistpll -{{[0-9]+}}(%rsp)
|
||||
; X64-NEXT: fldcw -{{[0-9]+}}(%rsp)
|
||||
; X64-NEXT: movl -{{[0-9]+}}(%rsp), %eax
|
||||
; X64-NEXT: retq
|
||||
%result = call i32 @llvm.experimental.constrained.fptoui.i32.x86_fp80(x86_fp80 %x,
|
||||
metadata !"fpexcept.strict") #0
|
||||
ret i32 %result
|
||||
}
|
||||
|
||||
define i64 @fp80_to_uint64(x86_fp80 %x) #0 {
|
||||
; X86-LABEL: fp80_to_uint64:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: pushl %ebp
|
||||
; X86-NEXT: .cfi_def_cfa_offset 8
|
||||
; X86-NEXT: .cfi_offset %ebp, -8
|
||||
; X86-NEXT: movl %esp, %ebp
|
||||
; X86-NEXT: .cfi_def_cfa_register %ebp
|
||||
; X86-NEXT: andl $-8, %esp
|
||||
; X86-NEXT: subl $16, %esp
|
||||
; X86-NEXT: fldt 8(%ebp)
|
||||
; X86-NEXT: flds {{\.LCPI.*}}
|
||||
; X86-NEXT: fucom %st(1)
|
||||
; X86-NEXT: fnstsw %ax
|
||||
; X86-NEXT: xorl %edx, %edx
|
||||
; X86-NEXT: # kill: def $ah killed $ah killed $ax
|
||||
; X86-NEXT: sahf
|
||||
; X86-NEXT: setbe %al
|
||||
; X86-NEXT: fldz
|
||||
; X86-NEXT: ja .LBB18_2
|
||||
; X86-NEXT: # %bb.1:
|
||||
; X86-NEXT: fstp %st(0)
|
||||
; X86-NEXT: fldz
|
||||
; X86-NEXT: fxch %st(1)
|
||||
; X86-NEXT: .LBB18_2:
|
||||
; X86-NEXT: fstp %st(1)
|
||||
; X86-NEXT: fsubrp %st, %st(1)
|
||||
; X86-NEXT: fnstcw {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-NEXT: orl $3072, %ecx # imm = 0xC00
|
||||
; X86-NEXT: movw %cx, {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: fldcw {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: fistpll {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: fldcw {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: movb %al, %dl
|
||||
; X86-NEXT: shll $31, %edx
|
||||
; X86-NEXT: xorl {{[0-9]+}}(%esp), %edx
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: movl %ebp, %esp
|
||||
; X86-NEXT: popl %ebp
|
||||
; X86-NEXT: .cfi_def_cfa %esp, 4
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: fp80_to_uint64:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: fldt {{[0-9]+}}(%rsp)
|
||||
; X64-NEXT: flds {{.*}}(%rip)
|
||||
; X64-NEXT: xorl %eax, %eax
|
||||
; X64-NEXT: fucomi %st(1), %st
|
||||
; X64-NEXT: setbe %al
|
||||
; X64-NEXT: fldz
|
||||
; X64-NEXT: fxch %st(1)
|
||||
; X64-NEXT: fcmovnbe %st(1), %st
|
||||
; X64-NEXT: fstp %st(1)
|
||||
; X64-NEXT: fsubrp %st, %st(1)
|
||||
; X64-NEXT: fnstcw -{{[0-9]+}}(%rsp)
|
||||
; X64-NEXT: movzwl -{{[0-9]+}}(%rsp), %ecx
|
||||
; X64-NEXT: orl $3072, %ecx # imm = 0xC00
|
||||
; X64-NEXT: movw %cx, -{{[0-9]+}}(%rsp)
|
||||
; X64-NEXT: fldcw -{{[0-9]+}}(%rsp)
|
||||
; X64-NEXT: fistpll -{{[0-9]+}}(%rsp)
|
||||
; X64-NEXT: fldcw -{{[0-9]+}}(%rsp)
|
||||
; X64-NEXT: shlq $63, %rax
|
||||
; X64-NEXT: xorq -{{[0-9]+}}(%rsp), %rax
|
||||
; X64-NEXT: retq
|
||||
%result = call i64 @llvm.experimental.constrained.fptoui.i64.x86_fp80(x86_fp80 %x,
|
||||
metadata !"fpexcept.strict") #0
|
||||
ret i64 %result
|
||||
}
|
||||
|
||||
attributes #0 = { strictfp }
|
||||
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,929 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512f,avx512vl -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,AVX512VL,AVX512VL-32
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,avx512vl -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,AVX512VL,AVX512VL-64
|
||||
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=avx512f,avx512dq -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,AVX512DQ,AVX512DQ-32
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512f,avx512dq -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,AVX512DQ,AVX512DQ-64
|
||||
|
||||
declare <8 x i64> @llvm.experimental.constrained.fptosi.v8i64.v8f64(<8 x double>, metadata)
|
||||
declare <8 x i64> @llvm.experimental.constrained.fptoui.v8i64.v8f64(<8 x double>, metadata)
|
||||
declare <8 x i64> @llvm.experimental.constrained.fptosi.v8i64.v8f32(<8 x float>, metadata)
|
||||
declare <8 x i64> @llvm.experimental.constrained.fptoui.v8i64.v8f32(<8 x float>, metadata)
|
||||
declare <8 x i32> @llvm.experimental.constrained.fptosi.v8i32.v8f64(<8 x double>, metadata)
|
||||
declare <8 x i32> @llvm.experimental.constrained.fptoui.v8i32.v8f64(<8 x double>, metadata)
|
||||
declare <8 x i16> @llvm.experimental.constrained.fptosi.v8i16.v8f64(<8 x double>, metadata)
|
||||
declare <8 x i16> @llvm.experimental.constrained.fptoui.v8i16.v8f64(<8 x double>, metadata)
|
||||
declare <8 x i8> @llvm.experimental.constrained.fptosi.v8i8.v8f64(<8 x double>, metadata)
|
||||
declare <8 x i8> @llvm.experimental.constrained.fptoui.v8i8.v8f64(<8 x double>, metadata)
|
||||
declare <8 x i1> @llvm.experimental.constrained.fptosi.v8i1.v8f64(<8 x double>, metadata)
|
||||
declare <8 x i1> @llvm.experimental.constrained.fptoui.v8i1.v8f64(<8 x double>, metadata)
|
||||
declare <8 x i1> @llvm.experimental.constrained.fptosi.v8i1.v8f32(<8 x float>, metadata)
|
||||
declare <8 x i1> @llvm.experimental.constrained.fptoui.v8i1.v8f32(<8 x float>, metadata)
|
||||
|
||||
declare <16 x i32> @llvm.experimental.constrained.fptosi.v16i32.v16f32(<16 x float>, metadata)
|
||||
declare <16 x i32> @llvm.experimental.constrained.fptoui.v16i32.v16f32(<16 x float>, metadata)
|
||||
declare <16 x i16> @llvm.experimental.constrained.fptosi.v16i16.v16f32(<16 x float>, metadata)
|
||||
declare <16 x i16> @llvm.experimental.constrained.fptoui.v16i16.v16f32(<16 x float>, metadata)
|
||||
declare <16 x i8> @llvm.experimental.constrained.fptosi.v16i8.v16f32(<16 x float>, metadata)
|
||||
declare <16 x i8> @llvm.experimental.constrained.fptoui.v16i8.v16f32(<16 x float>, metadata)
|
||||
declare <16 x i1> @llvm.experimental.constrained.fptosi.v16i1.v16f32(<16 x float>, metadata)
|
||||
declare <16 x i1> @llvm.experimental.constrained.fptoui.v16i1.v16f32(<16 x float>, metadata)
|
||||
|
||||
define <8 x i64> @strict_vector_fptosi_v8f64_to_v8i64(<8 x double> %a) #0 {
|
||||
; AVX512VL-32-LABEL: strict_vector_fptosi_v8f64_to_v8i64:
|
||||
; AVX512VL-32: # %bb.0:
|
||||
; AVX512VL-32-NEXT: pushl %ebp
|
||||
; AVX512VL-32-NEXT: .cfi_def_cfa_offset 8
|
||||
; AVX512VL-32-NEXT: .cfi_offset %ebp, -8
|
||||
; AVX512VL-32-NEXT: movl %esp, %ebp
|
||||
; AVX512VL-32-NEXT: .cfi_def_cfa_register %ebp
|
||||
; AVX512VL-32-NEXT: andl $-8, %esp
|
||||
; AVX512VL-32-NEXT: subl $64, %esp
|
||||
; AVX512VL-32-NEXT: vextractf32x4 $2, %zmm0, %xmm1
|
||||
; AVX512VL-32-NEXT: vmovlps %xmm1, {{[0-9]+}}(%esp)
|
||||
; AVX512VL-32-NEXT: vmovhps %xmm1, {{[0-9]+}}(%esp)
|
||||
; AVX512VL-32-NEXT: vextractf32x4 $3, %zmm0, %xmm1
|
||||
; AVX512VL-32-NEXT: vmovlps %xmm1, {{[0-9]+}}(%esp)
|
||||
; AVX512VL-32-NEXT: vmovhps %xmm1, (%esp)
|
||||
; AVX512VL-32-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp)
|
||||
; AVX512VL-32-NEXT: vmovhps %xmm0, {{[0-9]+}}(%esp)
|
||||
; AVX512VL-32-NEXT: vextractf128 $1, %ymm0, %xmm0
|
||||
; AVX512VL-32-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp)
|
||||
; AVX512VL-32-NEXT: vmovhps %xmm0, {{[0-9]+}}(%esp)
|
||||
; AVX512VL-32-NEXT: fldl {{[0-9]+}}(%esp)
|
||||
; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp)
|
||||
; AVX512VL-32-NEXT: fldl {{[0-9]+}}(%esp)
|
||||
; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp)
|
||||
; AVX512VL-32-NEXT: fldl {{[0-9]+}}(%esp)
|
||||
; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp)
|
||||
; AVX512VL-32-NEXT: fldl (%esp)
|
||||
; AVX512VL-32-NEXT: fisttpll (%esp)
|
||||
; AVX512VL-32-NEXT: fldl {{[0-9]+}}(%esp)
|
||||
; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp)
|
||||
; AVX512VL-32-NEXT: fldl {{[0-9]+}}(%esp)
|
||||
; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp)
|
||||
; AVX512VL-32-NEXT: fldl {{[0-9]+}}(%esp)
|
||||
; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp)
|
||||
; AVX512VL-32-NEXT: fldl {{[0-9]+}}(%esp)
|
||||
; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp)
|
||||
; AVX512VL-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; AVX512VL-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0
|
||||
; AVX512VL-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0
|
||||
; AVX512VL-32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0
|
||||
; AVX512VL-32-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||||
; AVX512VL-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm1, %xmm1
|
||||
; AVX512VL-32-NEXT: vpinsrd $2, (%esp), %xmm1, %xmm1
|
||||
; AVX512VL-32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm1, %xmm1
|
||||
; AVX512VL-32-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
|
||||
; AVX512VL-32-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||||
; AVX512VL-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm1, %xmm1
|
||||
; AVX512VL-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm1, %xmm1
|
||||
; AVX512VL-32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm1, %xmm1
|
||||
; AVX512VL-32-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero
|
||||
; AVX512VL-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm2, %xmm2
|
||||
; AVX512VL-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm2, %xmm2
|
||||
; AVX512VL-32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm2, %xmm2
|
||||
; AVX512VL-32-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
|
||||
; AVX512VL-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
|
||||
; AVX512VL-32-NEXT: movl %ebp, %esp
|
||||
; AVX512VL-32-NEXT: popl %ebp
|
||||
; AVX512VL-32-NEXT: .cfi_def_cfa %esp, 4
|
||||
; AVX512VL-32-NEXT: retl
|
||||
;
|
||||
; AVX512VL-64-LABEL: strict_vector_fptosi_v8f64_to_v8i64:
|
||||
; AVX512VL-64: # %bb.0:
|
||||
; AVX512VL-64-NEXT: vextractf32x4 $3, %zmm0, %xmm1
|
||||
; AVX512VL-64-NEXT: vcvttsd2si %xmm1, %rax
|
||||
; AVX512VL-64-NEXT: vmovq %rax, %xmm2
|
||||
; AVX512VL-64-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0]
|
||||
; AVX512VL-64-NEXT: vcvttsd2si %xmm1, %rax
|
||||
; AVX512VL-64-NEXT: vmovq %rax, %xmm1
|
||||
; AVX512VL-64-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
|
||||
; AVX512VL-64-NEXT: vextractf32x4 $2, %zmm0, %xmm2
|
||||
; AVX512VL-64-NEXT: vcvttsd2si %xmm2, %rax
|
||||
; AVX512VL-64-NEXT: vmovq %rax, %xmm3
|
||||
; AVX512VL-64-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0]
|
||||
; AVX512VL-64-NEXT: vcvttsd2si %xmm2, %rax
|
||||
; AVX512VL-64-NEXT: vmovq %rax, %xmm2
|
||||
; AVX512VL-64-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
|
||||
; AVX512VL-64-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1
|
||||
; AVX512VL-64-NEXT: vextractf128 $1, %ymm0, %xmm2
|
||||
; AVX512VL-64-NEXT: vcvttsd2si %xmm2, %rax
|
||||
; AVX512VL-64-NEXT: vmovq %rax, %xmm3
|
||||
; AVX512VL-64-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0]
|
||||
; AVX512VL-64-NEXT: vcvttsd2si %xmm2, %rax
|
||||
; AVX512VL-64-NEXT: vmovq %rax, %xmm2
|
||||
; AVX512VL-64-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
|
||||
; AVX512VL-64-NEXT: vcvttsd2si %xmm0, %rax
|
||||
; AVX512VL-64-NEXT: vmovq %rax, %xmm3
|
||||
; AVX512VL-64-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
|
||||
; AVX512VL-64-NEXT: vcvttsd2si %xmm0, %rax
|
||||
; AVX512VL-64-NEXT: vmovq %rax, %xmm0
|
||||
; AVX512VL-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0]
|
||||
; AVX512VL-64-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
|
||||
; AVX512VL-64-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
|
||||
; AVX512VL-64-NEXT: retq
|
||||
;
|
||||
; AVX512DQ-LABEL: strict_vector_fptosi_v8f64_to_v8i64:
|
||||
; AVX512DQ: # %bb.0:
|
||||
; AVX512DQ-NEXT: vcvttpd2qq %zmm0, %zmm0
|
||||
; AVX512DQ-NEXT: ret{{[l|q]}}
|
||||
%ret = call <8 x i64> @llvm.experimental.constrained.fptosi.v8i64.v8f64(<8 x double> %a,
|
||||
metadata !"fpexcept.strict")
|
||||
ret <8 x i64> %ret
|
||||
}
|
||||
|
||||
define <8 x i64> @strict_vector_fptoui_v8f64_to_v8i64(<8 x double> %a) #0 {
|
||||
; AVX512VL-32-LABEL: strict_vector_fptoui_v8f64_to_v8i64:
|
||||
; AVX512VL-32: # %bb.0:
|
||||
; AVX512VL-32-NEXT: pushl %ebp
|
||||
; AVX512VL-32-NEXT: .cfi_def_cfa_offset 8
|
||||
; AVX512VL-32-NEXT: .cfi_offset %ebp, -8
|
||||
; AVX512VL-32-NEXT: movl %esp, %ebp
|
||||
; AVX512VL-32-NEXT: .cfi_def_cfa_register %ebp
|
||||
; AVX512VL-32-NEXT: pushl %ebx
|
||||
; AVX512VL-32-NEXT: pushl %edi
|
||||
; AVX512VL-32-NEXT: pushl %esi
|
||||
; AVX512VL-32-NEXT: andl $-8, %esp
|
||||
; AVX512VL-32-NEXT: subl $80, %esp
|
||||
; AVX512VL-32-NEXT: .cfi_offset %esi, -20
|
||||
; AVX512VL-32-NEXT: .cfi_offset %edi, -16
|
||||
; AVX512VL-32-NEXT: .cfi_offset %ebx, -12
|
||||
; AVX512VL-32-NEXT: vpermilpd {{.*#+}} xmm3 = xmm0[1,0]
|
||||
; AVX512VL-32-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
|
||||
; AVX512VL-32-NEXT: vucomisd %xmm1, %xmm3
|
||||
; AVX512VL-32-NEXT: setb %al
|
||||
; AVX512VL-32-NEXT: kmovw %eax, %k1
|
||||
; AVX512VL-32-NEXT: vxorpd %xmm2, %xmm2, %xmm2
|
||||
; AVX512VL-32-NEXT: vmovapd %xmm1, %xmm4
|
||||
; AVX512VL-32-NEXT: vmovsd %xmm2, %xmm4, %xmm4 {%k1}
|
||||
; AVX512VL-32-NEXT: vsubsd %xmm4, %xmm3, %xmm3
|
||||
; AVX512VL-32-NEXT: vmovsd %xmm3, {{[0-9]+}}(%esp)
|
||||
; AVX512VL-32-NEXT: fldl {{[0-9]+}}(%esp)
|
||||
; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp)
|
||||
; AVX512VL-32-NEXT: movl $0, %eax
|
||||
; AVX512VL-32-NEXT: setae %al
|
||||
; AVX512VL-32-NEXT: shll $31, %eax
|
||||
; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %eax
|
||||
; AVX512VL-32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
|
||||
; AVX512VL-32-NEXT: vextractf128 $1, %ymm0, %xmm3
|
||||
; AVX512VL-32-NEXT: vpermilpd {{.*#+}} xmm4 = xmm3[1,0]
|
||||
; AVX512VL-32-NEXT: vucomisd %xmm1, %xmm4
|
||||
; AVX512VL-32-NEXT: setb %al
|
||||
; AVX512VL-32-NEXT: kmovw %eax, %k1
|
||||
; AVX512VL-32-NEXT: vmovapd %xmm1, %xmm5
|
||||
; AVX512VL-32-NEXT: vmovsd %xmm2, %xmm5, %xmm5 {%k1}
|
||||
; AVX512VL-32-NEXT: vsubsd %xmm5, %xmm4, %xmm4
|
||||
; AVX512VL-32-NEXT: vmovsd %xmm4, {{[0-9]+}}(%esp)
|
||||
; AVX512VL-32-NEXT: fldl {{[0-9]+}}(%esp)
|
||||
; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp)
|
||||
; AVX512VL-32-NEXT: movl $0, %eax
|
||||
; AVX512VL-32-NEXT: setae %al
|
||||
; AVX512VL-32-NEXT: shll $31, %eax
|
||||
; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %eax
|
||||
; AVX512VL-32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
|
||||
; AVX512VL-32-NEXT: vucomisd %xmm1, %xmm3
|
||||
; AVX512VL-32-NEXT: setb %al
|
||||
; AVX512VL-32-NEXT: kmovw %eax, %k1
|
||||
; AVX512VL-32-NEXT: vmovapd %xmm1, %xmm4
|
||||
; AVX512VL-32-NEXT: vmovsd %xmm2, %xmm4, %xmm4 {%k1}
|
||||
; AVX512VL-32-NEXT: vsubsd %xmm4, %xmm3, %xmm3
|
||||
; AVX512VL-32-NEXT: vmovsd %xmm3, {{[0-9]+}}(%esp)
|
||||
; AVX512VL-32-NEXT: fldl {{[0-9]+}}(%esp)
|
||||
; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp)
|
||||
; AVX512VL-32-NEXT: movl $0, %eax
|
||||
; AVX512VL-32-NEXT: setae %al
|
||||
; AVX512VL-32-NEXT: shll $31, %eax
|
||||
; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %eax
|
||||
; AVX512VL-32-NEXT: movl %eax, %edi
|
||||
; AVX512VL-32-NEXT: vextractf32x4 $2, %zmm0, %xmm3
|
||||
; AVX512VL-32-NEXT: vpermilpd {{.*#+}} xmm4 = xmm3[1,0]
|
||||
; AVX512VL-32-NEXT: vucomisd %xmm1, %xmm4
|
||||
; AVX512VL-32-NEXT: setb %al
|
||||
; AVX512VL-32-NEXT: kmovw %eax, %k1
|
||||
; AVX512VL-32-NEXT: vmovapd %xmm1, %xmm5
|
||||
; AVX512VL-32-NEXT: vmovsd %xmm2, %xmm5, %xmm5 {%k1}
|
||||
; AVX512VL-32-NEXT: vsubsd %xmm5, %xmm4, %xmm4
|
||||
; AVX512VL-32-NEXT: vmovsd %xmm4, {{[0-9]+}}(%esp)
|
||||
; AVX512VL-32-NEXT: fldl {{[0-9]+}}(%esp)
|
||||
; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp)
|
||||
; AVX512VL-32-NEXT: movl $0, %eax
|
||||
; AVX512VL-32-NEXT: setae %al
|
||||
; AVX512VL-32-NEXT: shll $31, %eax
|
||||
; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %eax
|
||||
; AVX512VL-32-NEXT: movl %eax, %esi
|
||||
; AVX512VL-32-NEXT: xorl %edx, %edx
|
||||
; AVX512VL-32-NEXT: vucomisd %xmm1, %xmm3
|
||||
; AVX512VL-32-NEXT: setb %al
|
||||
; AVX512VL-32-NEXT: kmovw %eax, %k1
|
||||
; AVX512VL-32-NEXT: vmovapd %xmm1, %xmm4
|
||||
; AVX512VL-32-NEXT: vmovsd %xmm2, %xmm4, %xmm4 {%k1}
|
||||
; AVX512VL-32-NEXT: vsubsd %xmm4, %xmm3, %xmm3
|
||||
; AVX512VL-32-NEXT: vmovsd %xmm3, {{[0-9]+}}(%esp)
|
||||
; AVX512VL-32-NEXT: fldl {{[0-9]+}}(%esp)
|
||||
; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp)
|
||||
; AVX512VL-32-NEXT: setae %dl
|
||||
; AVX512VL-32-NEXT: shll $31, %edx
|
||||
; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %edx
|
||||
; AVX512VL-32-NEXT: vextractf32x4 $3, %zmm0, %xmm3
|
||||
; AVX512VL-32-NEXT: vpermilpd {{.*#+}} xmm4 = xmm3[1,0]
|
||||
; AVX512VL-32-NEXT: xorl %ecx, %ecx
|
||||
; AVX512VL-32-NEXT: vucomisd %xmm1, %xmm4
|
||||
; AVX512VL-32-NEXT: setb %al
|
||||
; AVX512VL-32-NEXT: kmovw %eax, %k1
|
||||
; AVX512VL-32-NEXT: vmovapd %xmm1, %xmm5
|
||||
; AVX512VL-32-NEXT: vmovsd %xmm2, %xmm5, %xmm5 {%k1}
|
||||
; AVX512VL-32-NEXT: vsubsd %xmm5, %xmm4, %xmm4
|
||||
; AVX512VL-32-NEXT: vmovsd %xmm4, (%esp)
|
||||
; AVX512VL-32-NEXT: fldl (%esp)
|
||||
; AVX512VL-32-NEXT: fisttpll (%esp)
|
||||
; AVX512VL-32-NEXT: setae %cl
|
||||
; AVX512VL-32-NEXT: shll $31, %ecx
|
||||
; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %ecx
|
||||
; AVX512VL-32-NEXT: xorl %eax, %eax
|
||||
; AVX512VL-32-NEXT: vucomisd %xmm1, %xmm3
|
||||
; AVX512VL-32-NEXT: setb %bl
|
||||
; AVX512VL-32-NEXT: kmovw %ebx, %k1
|
||||
; AVX512VL-32-NEXT: vmovapd %xmm1, %xmm4
|
||||
; AVX512VL-32-NEXT: vmovsd %xmm2, %xmm4, %xmm4 {%k1}
|
||||
; AVX512VL-32-NEXT: vsubsd %xmm4, %xmm3, %xmm3
|
||||
; AVX512VL-32-NEXT: vmovsd %xmm3, {{[0-9]+}}(%esp)
|
||||
; AVX512VL-32-NEXT: fldl {{[0-9]+}}(%esp)
|
||||
; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp)
|
||||
; AVX512VL-32-NEXT: setae %al
|
||||
; AVX512VL-32-NEXT: shll $31, %eax
|
||||
; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %eax
|
||||
; AVX512VL-32-NEXT: vucomisd %xmm1, %xmm0
|
||||
; AVX512VL-32-NEXT: setb %bl
|
||||
; AVX512VL-32-NEXT: kmovw %ebx, %k1
|
||||
; AVX512VL-32-NEXT: vmovsd %xmm2, %xmm1, %xmm1 {%k1}
|
||||
; AVX512VL-32-NEXT: vsubsd %xmm1, %xmm0, %xmm0
|
||||
; AVX512VL-32-NEXT: vmovsd %xmm0, {{[0-9]+}}(%esp)
|
||||
; AVX512VL-32-NEXT: fldl {{[0-9]+}}(%esp)
|
||||
; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp)
|
||||
; AVX512VL-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; AVX512VL-32-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
|
||||
; AVX512VL-32-NEXT: vpinsrd $2, (%esp), %xmm0, %xmm0
|
||||
; AVX512VL-32-NEXT: vpinsrd $3, %ecx, %xmm0, %xmm0
|
||||
; AVX512VL-32-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||||
; AVX512VL-32-NEXT: vpinsrd $1, %edx, %xmm1, %xmm1
|
||||
; AVX512VL-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm1, %xmm1
|
||||
; AVX512VL-32-NEXT: vpinsrd $3, %esi, %xmm1, %xmm1
|
||||
; AVX512VL-32-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero
|
||||
; AVX512VL-32-NEXT: vpinsrd $1, %edi, %xmm2, %xmm2
|
||||
; AVX512VL-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm2, %xmm2
|
||||
; AVX512VL-32-NEXT: vpinsrd $3, {{[-0-9]+}}(%e{{[sb]}}p), %xmm2, %xmm2 # 4-byte Folded Reload
|
||||
; AVX512VL-32-NEXT: setae %al
|
||||
; AVX512VL-32-NEXT: movzbl %al, %eax
|
||||
; AVX512VL-32-NEXT: shll $31, %eax
|
||||
; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %eax
|
||||
; AVX512VL-32-NEXT: vmovd {{.*#+}} xmm3 = mem[0],zero,zero,zero
|
||||
; AVX512VL-32-NEXT: vpinsrd $1, %eax, %xmm3, %xmm3
|
||||
; AVX512VL-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm3, %xmm3
|
||||
; AVX512VL-32-NEXT: vpinsrd $3, {{[-0-9]+}}(%e{{[sb]}}p), %xmm3, %xmm3 # 4-byte Folded Reload
|
||||
; AVX512VL-32-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
|
||||
; AVX512VL-32-NEXT: vinserti128 $1, %xmm2, %ymm3, %ymm1
|
||||
; AVX512VL-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
|
||||
; AVX512VL-32-NEXT: leal -12(%ebp), %esp
|
||||
; AVX512VL-32-NEXT: popl %esi
|
||||
; AVX512VL-32-NEXT: popl %edi
|
||||
; AVX512VL-32-NEXT: popl %ebx
|
||||
; AVX512VL-32-NEXT: popl %ebp
|
||||
; AVX512VL-32-NEXT: .cfi_def_cfa %esp, 4
|
||||
; AVX512VL-32-NEXT: retl
|
||||
;
|
||||
; AVX512VL-64-LABEL: strict_vector_fptoui_v8f64_to_v8i64:
|
||||
; AVX512VL-64: # %bb.0:
|
||||
; AVX512VL-64-NEXT: vextractf32x4 $3, %zmm0, %xmm1
|
||||
; AVX512VL-64-NEXT: vcvttsd2usi %xmm1, %rax
|
||||
; AVX512VL-64-NEXT: vmovq %rax, %xmm2
|
||||
; AVX512VL-64-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0]
|
||||
; AVX512VL-64-NEXT: vcvttsd2usi %xmm1, %rax
|
||||
; AVX512VL-64-NEXT: vmovq %rax, %xmm1
|
||||
; AVX512VL-64-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
|
||||
; AVX512VL-64-NEXT: vextractf32x4 $2, %zmm0, %xmm2
|
||||
; AVX512VL-64-NEXT: vcvttsd2usi %xmm2, %rax
|
||||
; AVX512VL-64-NEXT: vmovq %rax, %xmm3
|
||||
; AVX512VL-64-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0]
|
||||
; AVX512VL-64-NEXT: vcvttsd2usi %xmm2, %rax
|
||||
; AVX512VL-64-NEXT: vmovq %rax, %xmm2
|
||||
; AVX512VL-64-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
|
||||
; AVX512VL-64-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1
|
||||
; AVX512VL-64-NEXT: vextractf128 $1, %ymm0, %xmm2
|
||||
; AVX512VL-64-NEXT: vcvttsd2usi %xmm2, %rax
|
||||
; AVX512VL-64-NEXT: vmovq %rax, %xmm3
|
||||
; AVX512VL-64-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0]
|
||||
; AVX512VL-64-NEXT: vcvttsd2usi %xmm2, %rax
|
||||
; AVX512VL-64-NEXT: vmovq %rax, %xmm2
|
||||
; AVX512VL-64-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
|
||||
; AVX512VL-64-NEXT: vcvttsd2usi %xmm0, %rax
|
||||
; AVX512VL-64-NEXT: vmovq %rax, %xmm3
|
||||
; AVX512VL-64-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
|
||||
; AVX512VL-64-NEXT: vcvttsd2usi %xmm0, %rax
|
||||
; AVX512VL-64-NEXT: vmovq %rax, %xmm0
|
||||
; AVX512VL-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0]
|
||||
; AVX512VL-64-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
|
||||
; AVX512VL-64-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
|
||||
; AVX512VL-64-NEXT: retq
|
||||
;
|
||||
; AVX512DQ-LABEL: strict_vector_fptoui_v8f64_to_v8i64:
|
||||
; AVX512DQ: # %bb.0:
|
||||
; AVX512DQ-NEXT: vcvttpd2uqq %zmm0, %zmm0
|
||||
; AVX512DQ-NEXT: ret{{[l|q]}}
|
||||
%ret = call <8 x i64> @llvm.experimental.constrained.fptoui.v8i64.v8f64(<8 x double> %a,
|
||||
metadata !"fpexcept.strict")
|
||||
ret <8 x i64> %ret
|
||||
}
|
||||
|
||||
define <8 x i64> @strict_vector_fptosi_v8f32_to_v8i64(<8 x float> %a) #0 {
|
||||
; AVX512VL-32-LABEL: strict_vector_fptosi_v8f32_to_v8i64:
|
||||
; AVX512VL-32: # %bb.0:
|
||||
; AVX512VL-32-NEXT: pushl %ebp
|
||||
; AVX512VL-32-NEXT: .cfi_def_cfa_offset 8
|
||||
; AVX512VL-32-NEXT: .cfi_offset %ebp, -8
|
||||
; AVX512VL-32-NEXT: movl %esp, %ebp
|
||||
; AVX512VL-32-NEXT: .cfi_def_cfa_register %ebp
|
||||
; AVX512VL-32-NEXT: andl $-8, %esp
|
||||
; AVX512VL-32-NEXT: subl $64, %esp
|
||||
; AVX512VL-32-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX512VL-32-NEXT: vmovd %xmm1, {{[0-9]+}}(%esp)
|
||||
; AVX512VL-32-NEXT: vextractps $1, %xmm1, {{[0-9]+}}(%esp)
|
||||
; AVX512VL-32-NEXT: vextractps $2, %xmm1, {{[0-9]+}}(%esp)
|
||||
; AVX512VL-32-NEXT: vextractps $3, %xmm1, (%esp)
|
||||
; AVX512VL-32-NEXT: vmovd %xmm0, {{[0-9]+}}(%esp)
|
||||
; AVX512VL-32-NEXT: vextractps $1, %xmm0, {{[0-9]+}}(%esp)
|
||||
; AVX512VL-32-NEXT: vextractps $2, %xmm0, {{[0-9]+}}(%esp)
|
||||
; AVX512VL-32-NEXT: vextractps $3, %xmm0, {{[0-9]+}}(%esp)
|
||||
; AVX512VL-32-NEXT: flds {{[0-9]+}}(%esp)
|
||||
; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp)
|
||||
; AVX512VL-32-NEXT: flds {{[0-9]+}}(%esp)
|
||||
; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp)
|
||||
; AVX512VL-32-NEXT: flds {{[0-9]+}}(%esp)
|
||||
; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp)
|
||||
; AVX512VL-32-NEXT: flds (%esp)
|
||||
; AVX512VL-32-NEXT: fisttpll (%esp)
|
||||
; AVX512VL-32-NEXT: flds {{[0-9]+}}(%esp)
|
||||
; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp)
|
||||
; AVX512VL-32-NEXT: flds {{[0-9]+}}(%esp)
|
||||
; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp)
|
||||
; AVX512VL-32-NEXT: flds {{[0-9]+}}(%esp)
|
||||
; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp)
|
||||
; AVX512VL-32-NEXT: flds {{[0-9]+}}(%esp)
|
||||
; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp)
|
||||
; AVX512VL-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; AVX512VL-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0
|
||||
; AVX512VL-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0
|
||||
; AVX512VL-32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0
|
||||
; AVX512VL-32-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||||
; AVX512VL-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm1, %xmm1
|
||||
; AVX512VL-32-NEXT: vpinsrd $2, (%esp), %xmm1, %xmm1
|
||||
; AVX512VL-32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm1, %xmm1
|
||||
; AVX512VL-32-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
|
||||
; AVX512VL-32-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||||
; AVX512VL-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm1, %xmm1
|
||||
; AVX512VL-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm1, %xmm1
|
||||
; AVX512VL-32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm1, %xmm1
|
||||
; AVX512VL-32-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero
|
||||
; AVX512VL-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm2, %xmm2
|
||||
; AVX512VL-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm2, %xmm2
|
||||
; AVX512VL-32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm2, %xmm2
|
||||
; AVX512VL-32-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
|
||||
; AVX512VL-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
|
||||
; AVX512VL-32-NEXT: movl %ebp, %esp
|
||||
; AVX512VL-32-NEXT: popl %ebp
|
||||
; AVX512VL-32-NEXT: .cfi_def_cfa %esp, 4
|
||||
; AVX512VL-32-NEXT: retl
|
||||
;
|
||||
; AVX512VL-64-LABEL: strict_vector_fptosi_v8f32_to_v8i64:
|
||||
; AVX512VL-64: # %bb.0:
|
||||
; AVX512VL-64-NEXT: vextractf128 $1, %ymm0, %xmm1
|
||||
; AVX512VL-64-NEXT: vpermilps {{.*#+}} xmm2 = xmm1[3,1,2,3]
|
||||
; AVX512VL-64-NEXT: vcvttss2si %xmm2, %rax
|
||||
; AVX512VL-64-NEXT: vmovq %rax, %xmm2
|
||||
; AVX512VL-64-NEXT: vpermilpd {{.*#+}} xmm3 = xmm1[1,0]
|
||||
; AVX512VL-64-NEXT: vcvttss2si %xmm3, %rax
|
||||
; AVX512VL-64-NEXT: vmovq %rax, %xmm3
|
||||
; AVX512VL-64-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
|
||||
; AVX512VL-64-NEXT: vcvttss2si %xmm1, %rax
|
||||
; AVX512VL-64-NEXT: vmovq %rax, %xmm3
|
||||
; AVX512VL-64-NEXT: vmovshdup {{.*#+}} xmm1 = xmm1[1,1,3,3]
|
||||
; AVX512VL-64-NEXT: vcvttss2si %xmm1, %rax
|
||||
; AVX512VL-64-NEXT: vmovq %rax, %xmm1
|
||||
; AVX512VL-64-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm3[0],xmm1[0]
|
||||
; AVX512VL-64-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
|
||||
; AVX512VL-64-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[3,1,2,3]
|
||||
; AVX512VL-64-NEXT: vcvttss2si %xmm2, %rax
|
||||
; AVX512VL-64-NEXT: vmovq %rax, %xmm2
|
||||
; AVX512VL-64-NEXT: vpermilpd {{.*#+}} xmm3 = xmm0[1,0]
|
||||
; AVX512VL-64-NEXT: vcvttss2si %xmm3, %rax
|
||||
; AVX512VL-64-NEXT: vmovq %rax, %xmm3
|
||||
; AVX512VL-64-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
|
||||
; AVX512VL-64-NEXT: vcvttss2si %xmm0, %rax
|
||||
; AVX512VL-64-NEXT: vmovq %rax, %xmm3
|
||||
; AVX512VL-64-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
|
||||
; AVX512VL-64-NEXT: vcvttss2si %xmm0, %rax
|
||||
; AVX512VL-64-NEXT: vmovq %rax, %xmm0
|
||||
; AVX512VL-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0]
|
||||
; AVX512VL-64-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
|
||||
; AVX512VL-64-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
|
||||
; AVX512VL-64-NEXT: retq
|
||||
;
|
||||
; AVX512DQ-LABEL: strict_vector_fptosi_v8f32_to_v8i64:
|
||||
; AVX512DQ: # %bb.0:
|
||||
; AVX512DQ-NEXT: vcvttps2qq %ymm0, %zmm0
|
||||
; AVX512DQ-NEXT: ret{{[l|q]}}
|
||||
%ret = call <8 x i64> @llvm.experimental.constrained.fptosi.v8i64.v8f32(<8 x float> %a,
|
||||
metadata !"fpexcept.strict")
|
||||
ret <8 x i64> %ret
|
||||
}
|
||||
|
||||
define <8 x i64> @strict_vector_fptoui_v8f32_to_v8i64(<8 x float> %a) #0 {
|
||||
; AVX512VL-32-LABEL: strict_vector_fptoui_v8f32_to_v8i64:
|
||||
; AVX512VL-32: # %bb.0:
|
||||
; AVX512VL-32-NEXT: pushl %ebp
|
||||
; AVX512VL-32-NEXT: .cfi_def_cfa_offset 8
|
||||
; AVX512VL-32-NEXT: .cfi_offset %ebp, -8
|
||||
; AVX512VL-32-NEXT: movl %esp, %ebp
|
||||
; AVX512VL-32-NEXT: .cfi_def_cfa_register %ebp
|
||||
; AVX512VL-32-NEXT: pushl %ebx
|
||||
; AVX512VL-32-NEXT: pushl %edi
|
||||
; AVX512VL-32-NEXT: pushl %esi
|
||||
; AVX512VL-32-NEXT: andl $-8, %esp
|
||||
; AVX512VL-32-NEXT: subl $80, %esp
|
||||
; AVX512VL-32-NEXT: .cfi_offset %esi, -20
|
||||
; AVX512VL-32-NEXT: .cfi_offset %edi, -16
|
||||
; AVX512VL-32-NEXT: .cfi_offset %ebx, -12
|
||||
; AVX512VL-32-NEXT: vmovshdup {{.*#+}} xmm3 = xmm0[1,1,3,3]
|
||||
; AVX512VL-32-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||||
; AVX512VL-32-NEXT: vucomiss %xmm1, %xmm3
|
||||
; AVX512VL-32-NEXT: setb %al
|
||||
; AVX512VL-32-NEXT: kmovw %eax, %k1
|
||||
; AVX512VL-32-NEXT: vxorps %xmm2, %xmm2, %xmm2
|
||||
; AVX512VL-32-NEXT: vmovaps %xmm1, %xmm4
|
||||
; AVX512VL-32-NEXT: vmovss %xmm2, %xmm4, %xmm4 {%k1}
|
||||
; AVX512VL-32-NEXT: vsubss %xmm4, %xmm3, %xmm3
|
||||
; AVX512VL-32-NEXT: vmovss %xmm3, {{[0-9]+}}(%esp)
|
||||
; AVX512VL-32-NEXT: flds {{[0-9]+}}(%esp)
|
||||
; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp)
|
||||
; AVX512VL-32-NEXT: movl $0, %eax
|
||||
; AVX512VL-32-NEXT: setae %al
|
||||
; AVX512VL-32-NEXT: shll $31, %eax
|
||||
; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %eax
|
||||
; AVX512VL-32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
|
||||
; AVX512VL-32-NEXT: vpermilps {{.*#+}} xmm3 = xmm0[3,1,2,3]
|
||||
; AVX512VL-32-NEXT: vucomiss %xmm1, %xmm3
|
||||
; AVX512VL-32-NEXT: setb %al
|
||||
; AVX512VL-32-NEXT: kmovw %eax, %k1
|
||||
; AVX512VL-32-NEXT: vmovaps %xmm1, %xmm4
|
||||
; AVX512VL-32-NEXT: vmovss %xmm2, %xmm4, %xmm4 {%k1}
|
||||
; AVX512VL-32-NEXT: vsubss %xmm4, %xmm3, %xmm3
|
||||
; AVX512VL-32-NEXT: vmovss %xmm3, {{[0-9]+}}(%esp)
|
||||
; AVX512VL-32-NEXT: flds {{[0-9]+}}(%esp)
|
||||
; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp)
|
||||
; AVX512VL-32-NEXT: movl $0, %eax
|
||||
; AVX512VL-32-NEXT: setae %al
|
||||
; AVX512VL-32-NEXT: shll $31, %eax
|
||||
; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %eax
|
||||
; AVX512VL-32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
|
||||
; AVX512VL-32-NEXT: vpermilpd {{.*#+}} xmm3 = xmm0[1,0]
|
||||
; AVX512VL-32-NEXT: vucomiss %xmm1, %xmm3
|
||||
; AVX512VL-32-NEXT: setb %al
|
||||
; AVX512VL-32-NEXT: kmovw %eax, %k1
|
||||
; AVX512VL-32-NEXT: vmovaps %xmm1, %xmm4
|
||||
; AVX512VL-32-NEXT: vmovss %xmm2, %xmm4, %xmm4 {%k1}
|
||||
; AVX512VL-32-NEXT: vsubss %xmm4, %xmm3, %xmm3
|
||||
; AVX512VL-32-NEXT: vmovss %xmm3, {{[0-9]+}}(%esp)
|
||||
; AVX512VL-32-NEXT: flds {{[0-9]+}}(%esp)
|
||||
; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp)
|
||||
; AVX512VL-32-NEXT: movl $0, %eax
|
||||
; AVX512VL-32-NEXT: setae %al
|
||||
; AVX512VL-32-NEXT: shll $31, %eax
|
||||
; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %eax
|
||||
; AVX512VL-32-NEXT: movl %eax, %edi
|
||||
; AVX512VL-32-NEXT: vextractf128 $1, %ymm0, %xmm3
|
||||
; AVX512VL-32-NEXT: vmovshdup {{.*#+}} xmm4 = xmm3[1,1,3,3]
|
||||
; AVX512VL-32-NEXT: vucomiss %xmm1, %xmm4
|
||||
; AVX512VL-32-NEXT: setb %al
|
||||
; AVX512VL-32-NEXT: kmovw %eax, %k1
|
||||
; AVX512VL-32-NEXT: vmovaps %xmm1, %xmm5
|
||||
; AVX512VL-32-NEXT: vmovss %xmm2, %xmm5, %xmm5 {%k1}
|
||||
; AVX512VL-32-NEXT: vsubss %xmm5, %xmm4, %xmm4
|
||||
; AVX512VL-32-NEXT: vmovss %xmm4, {{[0-9]+}}(%esp)
|
||||
; AVX512VL-32-NEXT: flds {{[0-9]+}}(%esp)
|
||||
; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp)
|
||||
; AVX512VL-32-NEXT: movl $0, %eax
|
||||
; AVX512VL-32-NEXT: setae %al
|
||||
; AVX512VL-32-NEXT: shll $31, %eax
|
||||
; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %eax
|
||||
; AVX512VL-32-NEXT: movl %eax, %esi
|
||||
; AVX512VL-32-NEXT: xorl %edx, %edx
|
||||
; AVX512VL-32-NEXT: vucomiss %xmm1, %xmm3
|
||||
; AVX512VL-32-NEXT: setb %al
|
||||
; AVX512VL-32-NEXT: kmovw %eax, %k1
|
||||
; AVX512VL-32-NEXT: vmovaps %xmm1, %xmm4
|
||||
; AVX512VL-32-NEXT: vmovss %xmm2, %xmm4, %xmm4 {%k1}
|
||||
; AVX512VL-32-NEXT: vsubss %xmm4, %xmm3, %xmm4
|
||||
; AVX512VL-32-NEXT: vmovss %xmm4, {{[0-9]+}}(%esp)
|
||||
; AVX512VL-32-NEXT: flds {{[0-9]+}}(%esp)
|
||||
; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp)
|
||||
; AVX512VL-32-NEXT: setae %dl
|
||||
; AVX512VL-32-NEXT: shll $31, %edx
|
||||
; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %edx
|
||||
; AVX512VL-32-NEXT: vpermilps {{.*#+}} xmm4 = xmm3[3,1,2,3]
|
||||
; AVX512VL-32-NEXT: xorl %ecx, %ecx
|
||||
; AVX512VL-32-NEXT: vucomiss %xmm1, %xmm4
|
||||
; AVX512VL-32-NEXT: setb %al
|
||||
; AVX512VL-32-NEXT: kmovw %eax, %k1
|
||||
; AVX512VL-32-NEXT: vmovaps %xmm1, %xmm5
|
||||
; AVX512VL-32-NEXT: vmovss %xmm2, %xmm5, %xmm5 {%k1}
|
||||
; AVX512VL-32-NEXT: vsubss %xmm5, %xmm4, %xmm4
|
||||
; AVX512VL-32-NEXT: vmovss %xmm4, (%esp)
|
||||
; AVX512VL-32-NEXT: flds (%esp)
|
||||
; AVX512VL-32-NEXT: fisttpll (%esp)
|
||||
; AVX512VL-32-NEXT: setae %cl
|
||||
; AVX512VL-32-NEXT: shll $31, %ecx
|
||||
; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %ecx
|
||||
; AVX512VL-32-NEXT: vpermilpd {{.*#+}} xmm3 = xmm3[1,0]
|
||||
; AVX512VL-32-NEXT: xorl %eax, %eax
|
||||
; AVX512VL-32-NEXT: vucomiss %xmm1, %xmm3
|
||||
; AVX512VL-32-NEXT: setb %bl
|
||||
; AVX512VL-32-NEXT: kmovw %ebx, %k1
|
||||
; AVX512VL-32-NEXT: vmovaps %xmm1, %xmm4
|
||||
; AVX512VL-32-NEXT: vmovss %xmm2, %xmm4, %xmm4 {%k1}
|
||||
; AVX512VL-32-NEXT: vsubss %xmm4, %xmm3, %xmm3
|
||||
; AVX512VL-32-NEXT: vmovss %xmm3, {{[0-9]+}}(%esp)
|
||||
; AVX512VL-32-NEXT: flds {{[0-9]+}}(%esp)
|
||||
; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp)
|
||||
; AVX512VL-32-NEXT: setae %al
|
||||
; AVX512VL-32-NEXT: shll $31, %eax
|
||||
; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %eax
|
||||
; AVX512VL-32-NEXT: vucomiss %xmm1, %xmm0
|
||||
; AVX512VL-32-NEXT: setb %bl
|
||||
; AVX512VL-32-NEXT: kmovw %ebx, %k1
|
||||
; AVX512VL-32-NEXT: vmovss %xmm2, %xmm1, %xmm1 {%k1}
|
||||
; AVX512VL-32-NEXT: vsubss %xmm1, %xmm0, %xmm0
|
||||
; AVX512VL-32-NEXT: vmovss %xmm0, {{[0-9]+}}(%esp)
|
||||
; AVX512VL-32-NEXT: flds {{[0-9]+}}(%esp)
|
||||
; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp)
|
||||
; AVX512VL-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; AVX512VL-32-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
|
||||
; AVX512VL-32-NEXT: vpinsrd $2, (%esp), %xmm0, %xmm0
|
||||
; AVX512VL-32-NEXT: vpinsrd $3, %ecx, %xmm0, %xmm0
|
||||
; AVX512VL-32-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||||
; AVX512VL-32-NEXT: vpinsrd $1, %edx, %xmm1, %xmm1
|
||||
; AVX512VL-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm1, %xmm1
|
||||
; AVX512VL-32-NEXT: vpinsrd $3, %esi, %xmm1, %xmm1
|
||||
; AVX512VL-32-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero
|
||||
; AVX512VL-32-NEXT: vpinsrd $1, %edi, %xmm2, %xmm2
|
||||
; AVX512VL-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm2, %xmm2
|
||||
; AVX512VL-32-NEXT: vpinsrd $3, {{[-0-9]+}}(%e{{[sb]}}p), %xmm2, %xmm2 # 4-byte Folded Reload
|
||||
; AVX512VL-32-NEXT: setae %al
|
||||
; AVX512VL-32-NEXT: movzbl %al, %eax
|
||||
; AVX512VL-32-NEXT: shll $31, %eax
|
||||
; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %eax
|
||||
; AVX512VL-32-NEXT: vmovd {{.*#+}} xmm3 = mem[0],zero,zero,zero
|
||||
; AVX512VL-32-NEXT: vpinsrd $1, %eax, %xmm3, %xmm3
|
||||
; AVX512VL-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm3, %xmm3
|
||||
; AVX512VL-32-NEXT: vpinsrd $3, {{[-0-9]+}}(%e{{[sb]}}p), %xmm3, %xmm3 # 4-byte Folded Reload
|
||||
; AVX512VL-32-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
|
||||
; AVX512VL-32-NEXT: vinserti128 $1, %xmm2, %ymm3, %ymm1
|
||||
; AVX512VL-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
|
||||
; AVX512VL-32-NEXT: leal -12(%ebp), %esp
|
||||
; AVX512VL-32-NEXT: popl %esi
|
||||
; AVX512VL-32-NEXT: popl %edi
|
||||
; AVX512VL-32-NEXT: popl %ebx
|
||||
; AVX512VL-32-NEXT: popl %ebp
|
||||
; AVX512VL-32-NEXT: .cfi_def_cfa %esp, 4
|
||||
; AVX512VL-32-NEXT: retl
|
||||
;
|
||||
; AVX512VL-64-LABEL: strict_vector_fptoui_v8f32_to_v8i64:
|
||||
; AVX512VL-64: # %bb.0:
|
||||
; AVX512VL-64-NEXT: vextractf128 $1, %ymm0, %xmm1
|
||||
; AVX512VL-64-NEXT: vpermilps {{.*#+}} xmm2 = xmm1[3,1,2,3]
|
||||
; AVX512VL-64-NEXT: vcvttss2usi %xmm2, %rax
|
||||
; AVX512VL-64-NEXT: vmovq %rax, %xmm2
|
||||
; AVX512VL-64-NEXT: vpermilpd {{.*#+}} xmm3 = xmm1[1,0]
|
||||
; AVX512VL-64-NEXT: vcvttss2usi %xmm3, %rax
|
||||
; AVX512VL-64-NEXT: vmovq %rax, %xmm3
|
||||
; AVX512VL-64-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
|
||||
; AVX512VL-64-NEXT: vcvttss2usi %xmm1, %rax
|
||||
; AVX512VL-64-NEXT: vmovq %rax, %xmm3
|
||||
; AVX512VL-64-NEXT: vmovshdup {{.*#+}} xmm1 = xmm1[1,1,3,3]
|
||||
; AVX512VL-64-NEXT: vcvttss2usi %xmm1, %rax
|
||||
; AVX512VL-64-NEXT: vmovq %rax, %xmm1
|
||||
; AVX512VL-64-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm3[0],xmm1[0]
|
||||
; AVX512VL-64-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
|
||||
; AVX512VL-64-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[3,1,2,3]
|
||||
; AVX512VL-64-NEXT: vcvttss2usi %xmm2, %rax
|
||||
; AVX512VL-64-NEXT: vmovq %rax, %xmm2
|
||||
; AVX512VL-64-NEXT: vpermilpd {{.*#+}} xmm3 = xmm0[1,0]
|
||||
; AVX512VL-64-NEXT: vcvttss2usi %xmm3, %rax
|
||||
; AVX512VL-64-NEXT: vmovq %rax, %xmm3
|
||||
; AVX512VL-64-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
|
||||
; AVX512VL-64-NEXT: vcvttss2usi %xmm0, %rax
|
||||
; AVX512VL-64-NEXT: vmovq %rax, %xmm3
|
||||
; AVX512VL-64-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
|
||||
; AVX512VL-64-NEXT: vcvttss2usi %xmm0, %rax
|
||||
; AVX512VL-64-NEXT: vmovq %rax, %xmm0
|
||||
; AVX512VL-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0]
|
||||
; AVX512VL-64-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
|
||||
; AVX512VL-64-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
|
||||
; AVX512VL-64-NEXT: retq
|
||||
;
|
||||
; AVX512DQ-LABEL: strict_vector_fptoui_v8f32_to_v8i64:
|
||||
; AVX512DQ: # %bb.0:
|
||||
; AVX512DQ-NEXT: vcvttps2uqq %ymm0, %zmm0
|
||||
; AVX512DQ-NEXT: ret{{[l|q]}}
|
||||
%ret = call <8 x i64> @llvm.experimental.constrained.fptoui.v8i64.v8f32(<8 x float> %a,
|
||||
metadata !"fpexcept.strict")
|
||||
ret <8 x i64> %ret
|
||||
}
|
||||
|
||||
define <8 x i32> @strict_vector_fptosi_v8f64_to_v8i32(<8 x double> %a) #0 {
|
||||
; CHECK-LABEL: strict_vector_fptosi_v8f64_to_v8i32:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvttpd2dq %zmm0, %ymm0
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
%ret = call <8 x i32> @llvm.experimental.constrained.fptosi.v8i32.v8f64(<8 x double> %a,
|
||||
metadata !"fpexcept.strict")
|
||||
ret <8 x i32> %ret
|
||||
}
|
||||
|
||||
define <8 x i32> @strict_vector_fptoui_v8f64_to_v8i32(<8 x double> %a) #0 {
|
||||
; CHECK-LABEL: strict_vector_fptoui_v8f64_to_v8i32:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vextractf32x4 $2, %zmm0, %xmm1
|
||||
; CHECK-NEXT: vpermilpd {{.*#+}} xmm2 = xmm1[1,0]
|
||||
; CHECK-NEXT: vcvttsd2usi %xmm2, %eax
|
||||
; CHECK-NEXT: vcvttsd2usi %xmm1, %ecx
|
||||
; CHECK-NEXT: vmovd %ecx, %xmm1
|
||||
; CHECK-NEXT: vpinsrd $1, %eax, %xmm1, %xmm1
|
||||
; CHECK-NEXT: vextractf32x4 $3, %zmm0, %xmm2
|
||||
; CHECK-NEXT: vcvttsd2usi %xmm2, %eax
|
||||
; CHECK-NEXT: vpinsrd $2, %eax, %xmm1, %xmm1
|
||||
; CHECK-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0]
|
||||
; CHECK-NEXT: vcvttsd2usi %xmm2, %eax
|
||||
; CHECK-NEXT: vpinsrd $3, %eax, %xmm1, %xmm1
|
||||
; CHECK-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
|
||||
; CHECK-NEXT: vcvttsd2usi %xmm2, %eax
|
||||
; CHECK-NEXT: vcvttsd2usi %xmm0, %ecx
|
||||
; CHECK-NEXT: vmovd %ecx, %xmm2
|
||||
; CHECK-NEXT: vpinsrd $1, %eax, %xmm2, %xmm2
|
||||
; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
|
||||
; CHECK-NEXT: vcvttsd2usi %xmm0, %eax
|
||||
; CHECK-NEXT: vpinsrd $2, %eax, %xmm2, %xmm2
|
||||
; CHECK-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
|
||||
; CHECK-NEXT: vcvttsd2usi %xmm0, %eax
|
||||
; CHECK-NEXT: vpinsrd $3, %eax, %xmm2, %xmm0
|
||||
; CHECK-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
%ret = call <8 x i32> @llvm.experimental.constrained.fptoui.v8i32.v8f64(<8 x double> %a,
|
||||
metadata !"fpexcept.strict")
|
||||
ret <8 x i32> %ret
|
||||
}
|
||||
|
||||
define <8 x i16> @strict_vector_fptosi_v8f64_to_v8i16(<8 x double> %a) #0 {
|
||||
; AVX512VL-LABEL: strict_vector_fptosi_v8f64_to_v8i16:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vcvttpd2dq %zmm0, %ymm0
|
||||
; AVX512VL-NEXT: vpmovdw %ymm0, %xmm0
|
||||
; AVX512VL-NEXT: vzeroupper
|
||||
; AVX512VL-NEXT: ret{{[l|q]}}
|
||||
;
|
||||
; AVX512DQ-LABEL: strict_vector_fptosi_v8f64_to_v8i16:
|
||||
; AVX512DQ: # %bb.0:
|
||||
; AVX512DQ-NEXT: vcvttpd2dq %zmm0, %ymm0
|
||||
; AVX512DQ-NEXT: vpmovdw %zmm0, %ymm0
|
||||
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
|
||||
; AVX512DQ-NEXT: vzeroupper
|
||||
; AVX512DQ-NEXT: ret{{[l|q]}}
|
||||
%ret = call <8 x i16> @llvm.experimental.constrained.fptosi.v8i16.v8f64(<8 x double> %a,
|
||||
metadata !"fpexcept.strict")
|
||||
ret <8 x i16> %ret
|
||||
}
|
||||
|
||||
define <8 x i16> @strict_vector_fptoui_v8f64_to_v8i16(<8 x double> %a) #0 {
|
||||
; AVX512VL-LABEL: strict_vector_fptoui_v8f64_to_v8i16:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vcvttpd2dq %zmm0, %ymm0
|
||||
; AVX512VL-NEXT: vpmovdw %ymm0, %xmm0
|
||||
; AVX512VL-NEXT: vzeroupper
|
||||
; AVX512VL-NEXT: ret{{[l|q]}}
|
||||
;
|
||||
; AVX512DQ-LABEL: strict_vector_fptoui_v8f64_to_v8i16:
|
||||
; AVX512DQ: # %bb.0:
|
||||
; AVX512DQ-NEXT: vcvttpd2dq %zmm0, %ymm0
|
||||
; AVX512DQ-NEXT: vpmovdw %zmm0, %ymm0
|
||||
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
|
||||
; AVX512DQ-NEXT: vzeroupper
|
||||
; AVX512DQ-NEXT: ret{{[l|q]}}
|
||||
%ret = call <8 x i16> @llvm.experimental.constrained.fptoui.v8i16.v8f64(<8 x double> %a,
|
||||
metadata !"fpexcept.strict")
|
||||
ret <8 x i16> %ret
|
||||
}
|
||||
|
||||
define <8 x i8> @strict_vector_fptosi_v8f64_to_v8i8(<8 x double> %a) #0 {
|
||||
; AVX512VL-LABEL: strict_vector_fptosi_v8f64_to_v8i8:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vcvttpd2dq %zmm0, %ymm0
|
||||
; AVX512VL-NEXT: vpmovdb %ymm0, %xmm0
|
||||
; AVX512VL-NEXT: vzeroupper
|
||||
; AVX512VL-NEXT: ret{{[l|q]}}
|
||||
;
|
||||
; AVX512DQ-LABEL: strict_vector_fptosi_v8f64_to_v8i8:
|
||||
; AVX512DQ: # %bb.0:
|
||||
; AVX512DQ-NEXT: vcvttpd2dq %zmm0, %ymm0
|
||||
; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vzeroupper
|
||||
; AVX512DQ-NEXT: ret{{[l|q]}}
|
||||
%ret = call <8 x i8> @llvm.experimental.constrained.fptosi.v8i8.v8f64(<8 x double> %a,
|
||||
metadata !"fpexcept.strict")
|
||||
ret <8 x i8> %ret
|
||||
}
|
||||
|
||||
define <8 x i8> @strict_vector_fptoui_v8f64_to_v8i8(<8 x double> %a) #0 {
|
||||
; AVX512VL-LABEL: strict_vector_fptoui_v8f64_to_v8i8:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vcvttpd2dq %zmm0, %ymm0
|
||||
; AVX512VL-NEXT: vpmovdb %ymm0, %xmm0
|
||||
; AVX512VL-NEXT: vzeroupper
|
||||
; AVX512VL-NEXT: ret{{[l|q]}}
|
||||
;
|
||||
; AVX512DQ-LABEL: strict_vector_fptoui_v8f64_to_v8i8:
|
||||
; AVX512DQ: # %bb.0:
|
||||
; AVX512DQ-NEXT: vcvttpd2dq %zmm0, %ymm0
|
||||
; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vzeroupper
|
||||
; AVX512DQ-NEXT: ret{{[l|q]}}
|
||||
%ret = call <8 x i8> @llvm.experimental.constrained.fptoui.v8i8.v8f64(<8 x double> %a,
|
||||
metadata !"fpexcept.strict")
|
||||
ret <8 x i8> %ret
|
||||
}
|
||||
|
||||
define <8 x i1> @strict_vector_fptosi_v8f64_to_v8i1(<8 x double> %a) #0 {
|
||||
; AVX512VL-LABEL: strict_vector_fptosi_v8f64_to_v8i1:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vcvttpd2dq %zmm0, %ymm0
|
||||
; AVX512VL-NEXT: vptestmd %ymm0, %ymm0, %k1
|
||||
; AVX512VL-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
|
||||
; AVX512VL-NEXT: vpmovdw %ymm0, %xmm0
|
||||
; AVX512VL-NEXT: vzeroupper
|
||||
; AVX512VL-NEXT: ret{{[l|q]}}
|
||||
;
|
||||
; AVX512DQ-LABEL: strict_vector_fptosi_v8f64_to_v8i1:
|
||||
; AVX512DQ: # %bb.0:
|
||||
; AVX512DQ-NEXT: vcvttpd2dq %zmm0, %ymm0
|
||||
; AVX512DQ-NEXT: vpmovd2m %zmm0, %k0
|
||||
; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0
|
||||
; AVX512DQ-NEXT: vpmovdw %zmm0, %ymm0
|
||||
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
|
||||
; AVX512DQ-NEXT: vzeroupper
|
||||
; AVX512DQ-NEXT: ret{{[l|q]}}
|
||||
%ret = call <8 x i1> @llvm.experimental.constrained.fptosi.v8i1.v8f64(<8 x double> %a,
|
||||
metadata !"fpexcept.strict")
|
||||
ret <8 x i1> %ret
|
||||
}
|
||||
|
||||
define <8 x i1> @strict_vector_fptoui_v8f64_to_v8i1(<8 x double> %a) #0 {
|
||||
; AVX512VL-LABEL: strict_vector_fptoui_v8f64_to_v8i1:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vcvttpd2dq %zmm0, %ymm0
|
||||
; AVX512VL-NEXT: vpslld $31, %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: vptestmd %ymm0, %ymm0, %k1
|
||||
; AVX512VL-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
|
||||
; AVX512VL-NEXT: vpmovdw %ymm0, %xmm0
|
||||
; AVX512VL-NEXT: vzeroupper
|
||||
; AVX512VL-NEXT: ret{{[l|q]}}
|
||||
;
|
||||
; AVX512DQ-LABEL: strict_vector_fptoui_v8f64_to_v8i1:
|
||||
; AVX512DQ: # %bb.0:
|
||||
; AVX512DQ-NEXT: vcvttpd2dq %zmm0, %ymm0
|
||||
; AVX512DQ-NEXT: vpslld $31, %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: vpmovd2m %zmm0, %k0
|
||||
; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0
|
||||
; AVX512DQ-NEXT: vpmovdw %zmm0, %ymm0
|
||||
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
|
||||
; AVX512DQ-NEXT: vzeroupper
|
||||
; AVX512DQ-NEXT: ret{{[l|q]}}
|
||||
%ret = call <8 x i1> @llvm.experimental.constrained.fptoui.v8i1.v8f64(<8 x double> %a,
|
||||
metadata !"fpexcept.strict")
|
||||
ret <8 x i1> %ret
|
||||
}
|
||||
|
||||
define <16 x i32> @strict_vector_fptosi_v16f32_to_v16i32(<16 x float> %a) #0 {
|
||||
; CHECK-LABEL: strict_vector_fptosi_v16f32_to_v16i32:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvttps2dq %zmm0, %zmm0
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
%ret = call <16 x i32> @llvm.experimental.constrained.fptosi.v16i32.v16f32(<16 x float> %a,
|
||||
metadata !"fpexcept.strict")
|
||||
ret <16 x i32> %ret
|
||||
}
|
||||
|
||||
define <16 x i32> @strict_vector_fptoui_v16f32_to_v16i32(<16 x float> %a) #0 {
|
||||
; CHECK-LABEL: strict_vector_fptoui_v16f32_to_v16i32:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvttps2udq %zmm0, %zmm0
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
%ret = call <16 x i32> @llvm.experimental.constrained.fptoui.v16i32.v16f32(<16 x float> %a,
|
||||
metadata !"fpexcept.strict")
|
||||
ret <16 x i32> %ret
|
||||
}
|
||||
|
||||
define <16 x i16> @strict_vector_fptosi_v16f32_to_v16i16(<16 x float> %a) #0 {
|
||||
; CHECK-LABEL: strict_vector_fptosi_v16f32_to_v16i16:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvttps2dq %zmm0, %zmm0
|
||||
; CHECK-NEXT: vpmovdw %zmm0, %ymm0
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
%ret = call <16 x i16> @llvm.experimental.constrained.fptosi.v16i16.v16f32(<16 x float> %a,
|
||||
metadata !"fpexcept.strict")
|
||||
ret <16 x i16> %ret
|
||||
}
|
||||
|
||||
define <16 x i16> @strict_vector_fptoui_v16f32_to_v16i16(<16 x float> %a) #0 {
|
||||
; CHECK-LABEL: strict_vector_fptoui_v16f32_to_v16i16:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvttps2dq %zmm0, %zmm0
|
||||
; CHECK-NEXT: vpmovdw %zmm0, %ymm0
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
%ret = call <16 x i16> @llvm.experimental.constrained.fptoui.v16i16.v16f32(<16 x float> %a,
|
||||
metadata !"fpexcept.strict")
|
||||
ret <16 x i16> %ret
|
||||
}
|
||||
|
||||
define <16 x i8> @strict_vector_fptosi_v16f32_to_v16i8(<16 x float> %a) #0 {
|
||||
; CHECK-LABEL: strict_vector_fptosi_v16f32_to_v16i8:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvttps2dq %zmm0, %zmm0
|
||||
; CHECK-NEXT: vpmovdb %zmm0, %xmm0
|
||||
; CHECK-NEXT: vzeroupper
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
%ret = call <16 x i8> @llvm.experimental.constrained.fptosi.v16i8.v16f32(<16 x float> %a,
|
||||
metadata !"fpexcept.strict")
|
||||
ret <16 x i8> %ret
|
||||
}
|
||||
|
||||
define <16 x i8> @strict_vector_fptoui_v16f32_to_v16i8(<16 x float> %a) #0 {
|
||||
; CHECK-LABEL: strict_vector_fptoui_v16f32_to_v16i8:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvttps2dq %zmm0, %zmm0
|
||||
; CHECK-NEXT: vpmovdb %zmm0, %xmm0
|
||||
; CHECK-NEXT: vzeroupper
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
%ret = call <16 x i8> @llvm.experimental.constrained.fptoui.v16i8.v16f32(<16 x float> %a,
|
||||
metadata !"fpexcept.strict")
|
||||
ret <16 x i8> %ret
|
||||
}
|
||||
|
||||
define <16 x i1> @strict_vector_fptosi_v16f32_to_v16i1(<16 x float> %a) #0 {
|
||||
; AVX512VL-LABEL: strict_vector_fptosi_v16f32_to_v16i1:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vcvttps2dq %zmm0, %zmm0
|
||||
; AVX512VL-NEXT: vptestmd %zmm0, %zmm0, %k1
|
||||
; AVX512VL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
||||
; AVX512VL-NEXT: vpmovdb %zmm0, %xmm0
|
||||
; AVX512VL-NEXT: vzeroupper
|
||||
; AVX512VL-NEXT: ret{{[l|q]}}
|
||||
;
|
||||
; AVX512DQ-LABEL: strict_vector_fptosi_v16f32_to_v16i1:
|
||||
; AVX512DQ: # %bb.0:
|
||||
; AVX512DQ-NEXT: vcvttps2dq %zmm0, %zmm0
|
||||
; AVX512DQ-NEXT: vpmovd2m %zmm0, %k0
|
||||
; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0
|
||||
; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vzeroupper
|
||||
; AVX512DQ-NEXT: ret{{[l|q]}}
|
||||
%ret = call <16 x i1> @llvm.experimental.constrained.fptosi.v16i1.v16f32(<16 x float> %a,
|
||||
metadata !"fpexcept.strict")
|
||||
ret <16 x i1> %ret
|
||||
}
|
||||
|
||||
define <16 x i1> @strict_vector_fptoui_v16f32_to_v16i1(<16 x float> %a) #0 {
|
||||
; AVX512VL-LABEL: strict_vector_fptoui_v16f32_to_v16i1:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vcvttps2dq %zmm0, %zmm0
|
||||
; AVX512VL-NEXT: vpslld $31, %zmm0, %zmm0
|
||||
; AVX512VL-NEXT: vptestmd %zmm0, %zmm0, %k1
|
||||
; AVX512VL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
||||
; AVX512VL-NEXT: vpmovdb %zmm0, %xmm0
|
||||
; AVX512VL-NEXT: vzeroupper
|
||||
; AVX512VL-NEXT: ret{{[l|q]}}
|
||||
;
|
||||
; AVX512DQ-LABEL: strict_vector_fptoui_v16f32_to_v16i1:
|
||||
; AVX512DQ: # %bb.0:
|
||||
; AVX512DQ-NEXT: vcvttps2dq %zmm0, %zmm0
|
||||
; AVX512DQ-NEXT: vpslld $31, %zmm0, %zmm0
|
||||
; AVX512DQ-NEXT: vpmovd2m %zmm0, %k0
|
||||
; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0
|
||||
; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vzeroupper
|
||||
; AVX512DQ-NEXT: ret{{[l|q]}}
|
||||
%ret = call <16 x i1> @llvm.experimental.constrained.fptoui.v16i1.v16f32(<16 x float> %a,
|
||||
metadata !"fpexcept.strict")
|
||||
ret <16 x i1> %ret
|
||||
}
|
||||
|
||||
|
||||
attributes #0 = { strictfp }
|
|
@ -3922,29 +3922,12 @@ entry:
|
|||
define <4 x i32> @constrained_vector_fptosi_v4i32_v4f32() #0 {
|
||||
; CHECK-LABEL: constrained_vector_fptosi_v4i32_v4f32:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: cvttss2si {{.*}}(%rip), %eax
|
||||
; CHECK-NEXT: movd %eax, %xmm0
|
||||
; CHECK-NEXT: cvttss2si {{.*}}(%rip), %eax
|
||||
; CHECK-NEXT: movd %eax, %xmm1
|
||||
; CHECK-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
|
||||
; CHECK-NEXT: cvttss2si {{.*}}(%rip), %eax
|
||||
; CHECK-NEXT: movd %eax, %xmm2
|
||||
; CHECK-NEXT: cvttss2si {{.*}}(%rip), %eax
|
||||
; CHECK-NEXT: movd %eax, %xmm0
|
||||
; CHECK-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
|
||||
; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
|
||||
; CHECK-NEXT: cvttps2dq {{.*}}(%rip), %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: constrained_vector_fptosi_v4i32_v4f32:
|
||||
; AVX: # %bb.0: # %entry
|
||||
; AVX-NEXT: vcvttss2si {{.*}}(%rip), %eax
|
||||
; AVX-NEXT: vmovd %eax, %xmm0
|
||||
; AVX-NEXT: vcvttss2si {{.*}}(%rip), %eax
|
||||
; AVX-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
|
||||
; AVX-NEXT: vcvttss2si {{.*}}(%rip), %eax
|
||||
; AVX-NEXT: vpinsrd $2, %eax, %xmm0, %xmm0
|
||||
; AVX-NEXT: vcvttss2si {{.*}}(%rip), %eax
|
||||
; AVX-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0
|
||||
; AVX-NEXT: vcvttps2dq {{.*}}(%rip), %xmm0
|
||||
; AVX-NEXT: retq
|
||||
entry:
|
||||
%result = call <4 x i32> @llvm.experimental.constrained.fptosi.v4i32.v4f32(
|
||||
|
@ -4065,20 +4048,27 @@ define <4 x i64> @constrained_vector_fptosi_v4i64_v4f32() #0 {
|
|||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: constrained_vector_fptosi_v4i64_v4f32:
|
||||
; AVX512: # %bb.0: # %entry
|
||||
; AVX512-NEXT: vcvttss2si {{.*}}(%rip), %rax
|
||||
; AVX512-NEXT: vmovq %rax, %xmm0
|
||||
; AVX512-NEXT: vcvttss2si {{.*}}(%rip), %rax
|
||||
; AVX512-NEXT: vmovq %rax, %xmm1
|
||||
; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
|
||||
; AVX512-NEXT: vcvttss2si {{.*}}(%rip), %rax
|
||||
; AVX512-NEXT: vmovq %rax, %xmm1
|
||||
; AVX512-NEXT: vcvttss2si {{.*}}(%rip), %rax
|
||||
; AVX512-NEXT: vmovq %rax, %xmm2
|
||||
; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
|
||||
; AVX512-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
|
||||
; AVX512-NEXT: retq
|
||||
; AVX512F-LABEL: constrained_vector_fptosi_v4i64_v4f32:
|
||||
; AVX512F: # %bb.0: # %entry
|
||||
; AVX512F-NEXT: vcvttss2si {{.*}}(%rip), %rax
|
||||
; AVX512F-NEXT: vmovq %rax, %xmm0
|
||||
; AVX512F-NEXT: vcvttss2si {{.*}}(%rip), %rax
|
||||
; AVX512F-NEXT: vmovq %rax, %xmm1
|
||||
; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
|
||||
; AVX512F-NEXT: vcvttss2si {{.*}}(%rip), %rax
|
||||
; AVX512F-NEXT: vmovq %rax, %xmm1
|
||||
; AVX512F-NEXT: vcvttss2si {{.*}}(%rip), %rax
|
||||
; AVX512F-NEXT: vmovq %rax, %xmm2
|
||||
; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
|
||||
; AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512DQ-LABEL: constrained_vector_fptosi_v4i64_v4f32:
|
||||
; AVX512DQ: # %bb.0: # %entry
|
||||
; AVX512DQ-NEXT: vmovaps {{.*#+}} xmm0 = [4.2E+1,4.3E+1,4.4E+1,4.5E+1]
|
||||
; AVX512DQ-NEXT: vcvttps2qq %ymm0, %zmm0
|
||||
; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
|
||||
; AVX512DQ-NEXT: retq
|
||||
entry:
|
||||
%result = call <4 x i64> @llvm.experimental.constrained.fptosi.v4i64.v4f32(
|
||||
<4 x float><float 42.0, float 43.0,
|
||||
|
@ -4108,19 +4098,12 @@ entry:
|
|||
define <2 x i32> @constrained_vector_fptosi_v2i32_v2f64() #0 {
|
||||
; CHECK-LABEL: constrained_vector_fptosi_v2i32_v2f64:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: cvttsd2si {{.*}}(%rip), %eax
|
||||
; CHECK-NEXT: movd %eax, %xmm1
|
||||
; CHECK-NEXT: cvttsd2si {{.*}}(%rip), %eax
|
||||
; CHECK-NEXT: movd %eax, %xmm0
|
||||
; CHECK-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
|
||||
; CHECK-NEXT: cvttpd2dq {{.*}}(%rip), %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: constrained_vector_fptosi_v2i32_v2f64:
|
||||
; AVX: # %bb.0: # %entry
|
||||
; AVX-NEXT: vcvttsd2si {{.*}}(%rip), %eax
|
||||
; AVX-NEXT: vmovd %eax, %xmm0
|
||||
; AVX-NEXT: vcvttsd2si {{.*}}(%rip), %eax
|
||||
; AVX-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
|
||||
; AVX-NEXT: vcvttpd2dqx {{.*}}(%rip), %xmm0
|
||||
; AVX-NEXT: retq
|
||||
entry:
|
||||
%result = call <2 x i32> @llvm.experimental.constrained.fptosi.v2i32.v2f64(
|
||||
|
@ -4162,29 +4145,14 @@ entry:
|
|||
define <4 x i32> @constrained_vector_fptosi_v4i32_v4f64() #0 {
|
||||
; CHECK-LABEL: constrained_vector_fptosi_v4i32_v4f64:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: cvttsd2si {{.*}}(%rip), %eax
|
||||
; CHECK-NEXT: movd %eax, %xmm0
|
||||
; CHECK-NEXT: cvttsd2si {{.*}}(%rip), %eax
|
||||
; CHECK-NEXT: movd %eax, %xmm1
|
||||
; CHECK-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
|
||||
; CHECK-NEXT: cvttsd2si {{.*}}(%rip), %eax
|
||||
; CHECK-NEXT: movd %eax, %xmm2
|
||||
; CHECK-NEXT: cvttsd2si {{.*}}(%rip), %eax
|
||||
; CHECK-NEXT: movd %eax, %xmm0
|
||||
; CHECK-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
|
||||
; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
|
||||
; CHECK-NEXT: cvttpd2dq {{.*}}(%rip), %xmm1
|
||||
; CHECK-NEXT: cvttpd2dq {{.*}}(%rip), %xmm0
|
||||
; CHECK-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
|
||||
; CHECK-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: constrained_vector_fptosi_v4i32_v4f64:
|
||||
; AVX: # %bb.0: # %entry
|
||||
; AVX-NEXT: vcvttsd2si {{.*}}(%rip), %eax
|
||||
; AVX-NEXT: vmovd %eax, %xmm0
|
||||
; AVX-NEXT: vcvttsd2si {{.*}}(%rip), %eax
|
||||
; AVX-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
|
||||
; AVX-NEXT: vcvttsd2si {{.*}}(%rip), %eax
|
||||
; AVX-NEXT: vpinsrd $2, %eax, %xmm0, %xmm0
|
||||
; AVX-NEXT: vcvttsd2si {{.*}}(%rip), %eax
|
||||
; AVX-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0
|
||||
; AVX-NEXT: vcvttpd2dqy {{.*}}(%rip), %xmm0
|
||||
; AVX-NEXT: retq
|
||||
entry:
|
||||
%result = call <4 x i32> @llvm.experimental.constrained.fptosi.v4i32.v4f64(
|
||||
|
@ -4221,14 +4189,31 @@ define <2 x i64> @constrained_vector_fptosi_v2i64_v2f64() #0 {
|
|||
; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
|
||||
; CHECK-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: constrained_vector_fptosi_v2i64_v2f64:
|
||||
; AVX: # %bb.0: # %entry
|
||||
; AVX-NEXT: vcvttsd2si {{.*}}(%rip), %rax
|
||||
; AVX-NEXT: vmovq %rax, %xmm0
|
||||
; AVX-NEXT: vcvttsd2si {{.*}}(%rip), %rax
|
||||
; AVX-NEXT: vmovq %rax, %xmm1
|
||||
; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
|
||||
; AVX-NEXT: retq
|
||||
; AVX1-LABEL: constrained_vector_fptosi_v2i64_v2f64:
|
||||
; AVX1: # %bb.0: # %entry
|
||||
; AVX1-NEXT: vcvttsd2si {{.*}}(%rip), %rax
|
||||
; AVX1-NEXT: vmovq %rax, %xmm0
|
||||
; AVX1-NEXT: vcvttsd2si {{.*}}(%rip), %rax
|
||||
; AVX1-NEXT: vmovq %rax, %xmm1
|
||||
; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX512F-LABEL: constrained_vector_fptosi_v2i64_v2f64:
|
||||
; AVX512F: # %bb.0: # %entry
|
||||
; AVX512F-NEXT: vcvttsd2si {{.*}}(%rip), %rax
|
||||
; AVX512F-NEXT: vmovq %rax, %xmm0
|
||||
; AVX512F-NEXT: vcvttsd2si {{.*}}(%rip), %rax
|
||||
; AVX512F-NEXT: vmovq %rax, %xmm1
|
||||
; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512DQ-LABEL: constrained_vector_fptosi_v2i64_v2f64:
|
||||
; AVX512DQ: # %bb.0: # %entry
|
||||
; AVX512DQ-NEXT: vmovaps {{.*#+}} xmm0 = [4.2100000000000001E+1,4.2200000000000003E+1]
|
||||
; AVX512DQ-NEXT: vcvttpd2qq %zmm0, %zmm0
|
||||
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
|
||||
; AVX512DQ-NEXT: vzeroupper
|
||||
; AVX512DQ-NEXT: retq
|
||||
entry:
|
||||
%result = call <2 x i64> @llvm.experimental.constrained.fptosi.v2i64.v2f64(
|
||||
<2 x double><double 42.1, double 42.2>,
|
||||
|
@ -4305,20 +4290,27 @@ define <4 x i64> @constrained_vector_fptosi_v4i64_v4f64() #0 {
|
|||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: constrained_vector_fptosi_v4i64_v4f64:
|
||||
; AVX512: # %bb.0: # %entry
|
||||
; AVX512-NEXT: vcvttsd2si {{.*}}(%rip), %rax
|
||||
; AVX512-NEXT: vmovq %rax, %xmm0
|
||||
; AVX512-NEXT: vcvttsd2si {{.*}}(%rip), %rax
|
||||
; AVX512-NEXT: vmovq %rax, %xmm1
|
||||
; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
|
||||
; AVX512-NEXT: vcvttsd2si {{.*}}(%rip), %rax
|
||||
; AVX512-NEXT: vmovq %rax, %xmm1
|
||||
; AVX512-NEXT: vcvttsd2si {{.*}}(%rip), %rax
|
||||
; AVX512-NEXT: vmovq %rax, %xmm2
|
||||
; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
|
||||
; AVX512-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
|
||||
; AVX512-NEXT: retq
|
||||
; AVX512F-LABEL: constrained_vector_fptosi_v4i64_v4f64:
|
||||
; AVX512F: # %bb.0: # %entry
|
||||
; AVX512F-NEXT: vcvttsd2si {{.*}}(%rip), %rax
|
||||
; AVX512F-NEXT: vmovq %rax, %xmm0
|
||||
; AVX512F-NEXT: vcvttsd2si {{.*}}(%rip), %rax
|
||||
; AVX512F-NEXT: vmovq %rax, %xmm1
|
||||
; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
|
||||
; AVX512F-NEXT: vcvttsd2si {{.*}}(%rip), %rax
|
||||
; AVX512F-NEXT: vmovq %rax, %xmm1
|
||||
; AVX512F-NEXT: vcvttsd2si {{.*}}(%rip), %rax
|
||||
; AVX512F-NEXT: vmovq %rax, %xmm2
|
||||
; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
|
||||
; AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512DQ-LABEL: constrained_vector_fptosi_v4i64_v4f64:
|
||||
; AVX512DQ: # %bb.0: # %entry
|
||||
; AVX512DQ-NEXT: vmovaps {{.*#+}} ymm0 = [4.2100000000000001E+1,4.2200000000000003E+1,4.2299999999999997E+1,4.2399999999999999E+1]
|
||||
; AVX512DQ-NEXT: vcvttpd2qq %zmm0, %zmm0
|
||||
; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
|
||||
; AVX512DQ-NEXT: retq
|
||||
entry:
|
||||
%result = call <4 x i64> @llvm.experimental.constrained.fptosi.v4i64.v4f64(
|
||||
<4 x double><double 42.1, double 42.2,
|
||||
|
@ -4643,20 +4635,27 @@ define <4 x i64> @constrained_vector_fptoui_v4i64_v4f32() #0 {
|
|||
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: constrained_vector_fptoui_v4i64_v4f32:
|
||||
; AVX512: # %bb.0: # %entry
|
||||
; AVX512-NEXT: vcvttss2usi {{.*}}(%rip), %rax
|
||||
; AVX512-NEXT: vmovq %rax, %xmm0
|
||||
; AVX512-NEXT: vcvttss2usi {{.*}}(%rip), %rax
|
||||
; AVX512-NEXT: vmovq %rax, %xmm1
|
||||
; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
|
||||
; AVX512-NEXT: vcvttss2usi {{.*}}(%rip), %rax
|
||||
; AVX512-NEXT: vmovq %rax, %xmm1
|
||||
; AVX512-NEXT: vcvttss2usi {{.*}}(%rip), %rax
|
||||
; AVX512-NEXT: vmovq %rax, %xmm2
|
||||
; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
|
||||
; AVX512-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
|
||||
; AVX512-NEXT: retq
|
||||
; AVX512F-LABEL: constrained_vector_fptoui_v4i64_v4f32:
|
||||
; AVX512F: # %bb.0: # %entry
|
||||
; AVX512F-NEXT: vcvttss2usi {{.*}}(%rip), %rax
|
||||
; AVX512F-NEXT: vmovq %rax, %xmm0
|
||||
; AVX512F-NEXT: vcvttss2usi {{.*}}(%rip), %rax
|
||||
; AVX512F-NEXT: vmovq %rax, %xmm1
|
||||
; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
|
||||
; AVX512F-NEXT: vcvttss2usi {{.*}}(%rip), %rax
|
||||
; AVX512F-NEXT: vmovq %rax, %xmm1
|
||||
; AVX512F-NEXT: vcvttss2usi {{.*}}(%rip), %rax
|
||||
; AVX512F-NEXT: vmovq %rax, %xmm2
|
||||
; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
|
||||
; AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512DQ-LABEL: constrained_vector_fptoui_v4i64_v4f32:
|
||||
; AVX512DQ: # %bb.0: # %entry
|
||||
; AVX512DQ-NEXT: vmovaps {{.*#+}} xmm0 = [4.2E+1,4.3E+1,4.4E+1,4.5E+1]
|
||||
; AVX512DQ-NEXT: vcvttps2uqq %ymm0, %zmm0
|
||||
; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
|
||||
; AVX512DQ-NEXT: retq
|
||||
entry:
|
||||
%result = call <4 x i64> @llvm.experimental.constrained.fptoui.v4i64.v4f32(
|
||||
<4 x float><float 42.0, float 43.0,
|
||||
|
@ -4865,14 +4864,22 @@ define <2 x i64> @constrained_vector_fptoui_v2i64_v2f64() #0 {
|
|||
; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: constrained_vector_fptoui_v2i64_v2f64:
|
||||
; AVX512: # %bb.0: # %entry
|
||||
; AVX512-NEXT: vcvttsd2usi {{.*}}(%rip), %rax
|
||||
; AVX512-NEXT: vmovq %rax, %xmm0
|
||||
; AVX512-NEXT: vcvttsd2usi {{.*}}(%rip), %rax
|
||||
; AVX512-NEXT: vmovq %rax, %xmm1
|
||||
; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
|
||||
; AVX512-NEXT: retq
|
||||
; AVX512F-LABEL: constrained_vector_fptoui_v2i64_v2f64:
|
||||
; AVX512F: # %bb.0: # %entry
|
||||
; AVX512F-NEXT: vcvttsd2usi {{.*}}(%rip), %rax
|
||||
; AVX512F-NEXT: vmovq %rax, %xmm0
|
||||
; AVX512F-NEXT: vcvttsd2usi {{.*}}(%rip), %rax
|
||||
; AVX512F-NEXT: vmovq %rax, %xmm1
|
||||
; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512DQ-LABEL: constrained_vector_fptoui_v2i64_v2f64:
|
||||
; AVX512DQ: # %bb.0: # %entry
|
||||
; AVX512DQ-NEXT: vmovaps {{.*#+}} xmm0 = [4.2100000000000001E+1,4.2200000000000003E+1]
|
||||
; AVX512DQ-NEXT: vcvttpd2uqq %zmm0, %zmm0
|
||||
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
|
||||
; AVX512DQ-NEXT: vzeroupper
|
||||
; AVX512DQ-NEXT: retq
|
||||
entry:
|
||||
%result = call <2 x i64> @llvm.experimental.constrained.fptoui.v2i64.v2f64(
|
||||
<2 x double><double 42.1, double 42.2>,
|
||||
|
@ -4981,20 +4988,27 @@ define <4 x i64> @constrained_vector_fptoui_v4i64_v4f64() #0 {
|
|||
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: constrained_vector_fptoui_v4i64_v4f64:
|
||||
; AVX512: # %bb.0: # %entry
|
||||
; AVX512-NEXT: vcvttsd2usi {{.*}}(%rip), %rax
|
||||
; AVX512-NEXT: vmovq %rax, %xmm0
|
||||
; AVX512-NEXT: vcvttsd2usi {{.*}}(%rip), %rax
|
||||
; AVX512-NEXT: vmovq %rax, %xmm1
|
||||
; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
|
||||
; AVX512-NEXT: vcvttsd2usi {{.*}}(%rip), %rax
|
||||
; AVX512-NEXT: vmovq %rax, %xmm1
|
||||
; AVX512-NEXT: vcvttsd2usi {{.*}}(%rip), %rax
|
||||
; AVX512-NEXT: vmovq %rax, %xmm2
|
||||
; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
|
||||
; AVX512-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
|
||||
; AVX512-NEXT: retq
|
||||
; AVX512F-LABEL: constrained_vector_fptoui_v4i64_v4f64:
|
||||
; AVX512F: # %bb.0: # %entry
|
||||
; AVX512F-NEXT: vcvttsd2usi {{.*}}(%rip), %rax
|
||||
; AVX512F-NEXT: vmovq %rax, %xmm0
|
||||
; AVX512F-NEXT: vcvttsd2usi {{.*}}(%rip), %rax
|
||||
; AVX512F-NEXT: vmovq %rax, %xmm1
|
||||
; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
|
||||
; AVX512F-NEXT: vcvttsd2usi {{.*}}(%rip), %rax
|
||||
; AVX512F-NEXT: vmovq %rax, %xmm1
|
||||
; AVX512F-NEXT: vcvttsd2usi {{.*}}(%rip), %rax
|
||||
; AVX512F-NEXT: vmovq %rax, %xmm2
|
||||
; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
|
||||
; AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512DQ-LABEL: constrained_vector_fptoui_v4i64_v4f64:
|
||||
; AVX512DQ: # %bb.0: # %entry
|
||||
; AVX512DQ-NEXT: vmovaps {{.*#+}} ymm0 = [4.2100000000000001E+1,4.2200000000000003E+1,4.2299999999999997E+1,4.2399999999999999E+1]
|
||||
; AVX512DQ-NEXT: vcvttpd2uqq %zmm0, %zmm0
|
||||
; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
|
||||
; AVX512DQ-NEXT: retq
|
||||
entry:
|
||||
%result = call <4 x i64> @llvm.experimental.constrained.fptoui.v4i64.v4f64(
|
||||
<4 x double><double 42.1, double 42.2,
|
||||
|
|
Loading…
Reference in New Issue