forked from OSchip/llvm-project
[AVX-512] Add support for lowering (v2i64 (fp_to_sint (v2f32))) to vcvttps2uqq when AVX512DQ and AVX512VL are available.
llvm-svn: 289335
This commit is contained in:
parent
8e288e0b68
commit
18b57da491
|
@ -1265,6 +1265,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
|
|||
if (Subtarget.hasVLX()) {
|
||||
// Fast v2f32 SINT_TO_FP( v2i32 ) custom conversion.
|
||||
setOperationAction(ISD::SINT_TO_FP, MVT::v2f32, Custom);
|
||||
setOperationAction(ISD::FP_TO_SINT, MVT::v2f32, Custom);
|
||||
setOperationAction(ISD::FP_TO_UINT, MVT::v2f32, Custom);
|
||||
}
|
||||
}
|
||||
if (Subtarget.hasVLX()) {
|
||||
|
@ -15233,11 +15235,28 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
|
|||
}
|
||||
|
||||
SDValue X86TargetLowering::LowerFP_TO_INT(SDValue Op,
|
||||
const X86Subtarget &Subtarget,
|
||||
SelectionDAG &DAG) const {
|
||||
assert(!Op.getSimpleValueType().isVector());
|
||||
|
||||
bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT;
|
||||
|
||||
MVT VT = Op.getSimpleValueType();
|
||||
|
||||
if (VT.isVector()) {
|
||||
assert(Subtarget.hasDQI() && Subtarget.hasVLX() && "Requires AVX512DQVL!");
|
||||
SDValue Src = Op.getOperand(0);
|
||||
SDLoc dl(Op);
|
||||
if (VT == MVT::v2i64 && Src.getSimpleValueType() == MVT::v2f32) {
|
||||
return DAG.getNode(IsSigned ? X86ISD::CVTTP2SI : X86ISD::CVTTP2UI,
|
||||
dl, VT,
|
||||
DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32, Src,
|
||||
DAG.getUNDEF(MVT::v2f32)));
|
||||
}
|
||||
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
assert(!VT.isVector());
|
||||
|
||||
std::pair<SDValue,SDValue> Vals = FP_TO_INTHelper(Op, DAG,
|
||||
IsSigned, /*IsReplace=*/ false);
|
||||
SDValue FIST = Vals.first, StackSlot = Vals.second;
|
||||
|
@ -15247,8 +15266,7 @@ SDValue X86TargetLowering::LowerFP_TO_INT(SDValue Op,
|
|||
|
||||
if (StackSlot.getNode())
|
||||
// Load the result.
|
||||
return DAG.getLoad(Op.getValueType(), SDLoc(Op), FIST, StackSlot,
|
||||
MachinePointerInfo());
|
||||
return DAG.getLoad(VT, SDLoc(Op), FIST, StackSlot, MachinePointerInfo());
|
||||
|
||||
// The node is the result.
|
||||
return FIST;
|
||||
|
@ -22780,7 +22798,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
|
|||
case ISD::SIGN_EXTEND_VECTOR_INREG:
|
||||
return LowerEXTEND_VECTOR_INREG(Op, Subtarget, DAG);
|
||||
case ISD::FP_TO_SINT:
|
||||
case ISD::FP_TO_UINT: return LowerFP_TO_INT(Op, DAG);
|
||||
case ISD::FP_TO_UINT: return LowerFP_TO_INT(Op, Subtarget, DAG);
|
||||
case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
|
||||
case ISD::LOAD: return LowerExtendedLoad(Op, Subtarget, DAG);
|
||||
case ISD::FABS:
|
||||
|
|
|
@ -1137,7 +1137,8 @@ namespace llvm {
|
|||
SDValue LowerUINT_TO_FP_i32(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue lowerUINT_TO_FP_vec(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerFP_TO_INT(SDValue Op, const X86Subtarget &Subtarget,
|
||||
SelectionDAG &DAG) const;
|
||||
SDValue LowerToBT(SDValue And, ISD::CondCode CC, const SDLoc &dl,
|
||||
SelectionDAG &DAG) const;
|
||||
SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
|
||||
|
|
|
@ -886,15 +886,50 @@ define <2 x i64> @fptosi_2f32_to_2i64(<4 x float> %a) {
|
|||
; SSE-NEXT: movdqa %xmm1, %xmm0
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: fptosi_2f32_to_2i64:
|
||||
; AVX: # BB#0:
|
||||
; AVX-NEXT: vcvttss2si %xmm0, %rax
|
||||
; AVX-NEXT: vmovq %rax, %xmm1
|
||||
; AVX-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
|
||||
; AVX-NEXT: vcvttss2si %xmm0, %rax
|
||||
; AVX-NEXT: vmovq %rax, %xmm0
|
||||
; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
|
||||
; AVX-NEXT: retq
|
||||
; VEX-LABEL: fptosi_2f32_to_2i64:
|
||||
; VEX: # BB#0:
|
||||
; VEX-NEXT: vcvttss2si %xmm0, %rax
|
||||
; VEX-NEXT: vmovq %rax, %xmm1
|
||||
; VEX-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
|
||||
; VEX-NEXT: vcvttss2si %xmm0, %rax
|
||||
; VEX-NEXT: vmovq %rax, %xmm0
|
||||
; VEX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
|
||||
; VEX-NEXT: retq
|
||||
;
|
||||
; AVX512F-LABEL: fptosi_2f32_to_2i64:
|
||||
; AVX512F: # BB#0:
|
||||
; AVX512F-NEXT: vcvttss2si %xmm0, %rax
|
||||
; AVX512F-NEXT: vmovq %rax, %xmm1
|
||||
; AVX512F-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
|
||||
; AVX512F-NEXT: vcvttss2si %xmm0, %rax
|
||||
; AVX512F-NEXT: vmovq %rax, %xmm0
|
||||
; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: fptosi_2f32_to_2i64:
|
||||
; AVX512VL: # BB#0:
|
||||
; AVX512VL-NEXT: vcvttss2si %xmm0, %rax
|
||||
; AVX512VL-NEXT: vmovq %rax, %xmm1
|
||||
; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
|
||||
; AVX512VL-NEXT: vcvttss2si %xmm0, %rax
|
||||
; AVX512VL-NEXT: vmovq %rax, %xmm0
|
||||
; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; AVX512DQ-LABEL: fptosi_2f32_to_2i64:
|
||||
; AVX512DQ: # BB#0:
|
||||
; AVX512DQ-NEXT: vcvttss2si %xmm0, %rax
|
||||
; AVX512DQ-NEXT: vmovq %rax, %xmm1
|
||||
; AVX512DQ-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
|
||||
; AVX512DQ-NEXT: vcvttss2si %xmm0, %rax
|
||||
; AVX512DQ-NEXT: vmovq %rax, %xmm0
|
||||
; AVX512DQ-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
|
||||
; AVX512DQ-NEXT: retq
|
||||
;
|
||||
; AVX512VLDQ-LABEL: fptosi_2f32_to_2i64:
|
||||
; AVX512VLDQ: # BB#0:
|
||||
; AVX512VLDQ-NEXT: vcvttps2qq %xmm0, %xmm0
|
||||
; AVX512VLDQ-NEXT: retq
|
||||
%shuf = shufflevector <4 x float> %a, <4 x float> undef, <2 x i32> <i32 0, i32 1>
|
||||
%cvt = fptosi <2 x float> %shuf to <2 x i64>
|
||||
ret <2 x i64> %cvt
|
||||
|
@ -1384,15 +1419,40 @@ define <2 x i64> @fptoui_2f32_to_2i64(<4 x float> %a) {
|
|||
; VEX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
|
||||
; VEX-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: fptoui_2f32_to_2i64:
|
||||
; AVX512: # BB#0:
|
||||
; AVX512-NEXT: vcvttss2usi %xmm0, %rax
|
||||
; AVX512-NEXT: vmovq %rax, %xmm1
|
||||
; AVX512-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
|
||||
; AVX512-NEXT: vcvttss2usi %xmm0, %rax
|
||||
; AVX512-NEXT: vmovq %rax, %xmm0
|
||||
; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
|
||||
; AVX512-NEXT: retq
|
||||
; AVX512F-LABEL: fptoui_2f32_to_2i64:
|
||||
; AVX512F: # BB#0:
|
||||
; AVX512F-NEXT: vcvttss2usi %xmm0, %rax
|
||||
; AVX512F-NEXT: vmovq %rax, %xmm1
|
||||
; AVX512F-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
|
||||
; AVX512F-NEXT: vcvttss2usi %xmm0, %rax
|
||||
; AVX512F-NEXT: vmovq %rax, %xmm0
|
||||
; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: fptoui_2f32_to_2i64:
|
||||
; AVX512VL: # BB#0:
|
||||
; AVX512VL-NEXT: vcvttss2usi %xmm0, %rax
|
||||
; AVX512VL-NEXT: vmovq %rax, %xmm1
|
||||
; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
|
||||
; AVX512VL-NEXT: vcvttss2usi %xmm0, %rax
|
||||
; AVX512VL-NEXT: vmovq %rax, %xmm0
|
||||
; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; AVX512DQ-LABEL: fptoui_2f32_to_2i64:
|
||||
; AVX512DQ: # BB#0:
|
||||
; AVX512DQ-NEXT: vcvttss2usi %xmm0, %rax
|
||||
; AVX512DQ-NEXT: vmovq %rax, %xmm1
|
||||
; AVX512DQ-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
|
||||
; AVX512DQ-NEXT: vcvttss2usi %xmm0, %rax
|
||||
; AVX512DQ-NEXT: vmovq %rax, %xmm0
|
||||
; AVX512DQ-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
|
||||
; AVX512DQ-NEXT: retq
|
||||
;
|
||||
; AVX512VLDQ-LABEL: fptoui_2f32_to_2i64:
|
||||
; AVX512VLDQ: # BB#0:
|
||||
; AVX512VLDQ-NEXT: vcvttps2uqq %xmm0, %xmm0
|
||||
; AVX512VLDQ-NEXT: retq
|
||||
%shuf = shufflevector <4 x float> %a, <4 x float> undef, <2 x i32> <i32 0, i32 1>
|
||||
%cvt = fptoui <2 x float> %shuf to <2 x i64>
|
||||
ret <2 x i64> %cvt
|
||||
|
|
Loading…
Reference in New Issue