forked from OSchip/llvm-project
Optimize the vector UINT_TO_FP, SINT_TO_FP and FP_TO_SINT operations where the integer type is i8 (commonly used in graphics).
llvm-svn: 155397
This commit is contained in:
parent
9a0914753a
commit
3f8acfc3c4
|
@ -1221,7 +1221,9 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
|
|||
setTargetDAGCombine(ISD::ANY_EXTEND);
|
||||
setTargetDAGCombine(ISD::SIGN_EXTEND);
|
||||
setTargetDAGCombine(ISD::TRUNCATE);
|
||||
setTargetDAGCombine(ISD::UINT_TO_FP);
|
||||
setTargetDAGCombine(ISD::SINT_TO_FP);
|
||||
setTargetDAGCombine(ISD::FP_TO_SINT);
|
||||
if (Subtarget->is64Bit())
|
||||
setTargetDAGCombine(ISD::MUL);
|
||||
if (Subtarget->hasBMI())
|
||||
|
@ -14985,9 +14987,43 @@ static SDValue PerformSETCCCombine(SDNode *N, SelectionDAG &DAG) {
|
|||
return SDValue();
|
||||
}
|
||||
|
||||
static SDValue PerformUINT_TO_FPCombine(SDNode *N, SelectionDAG &DAG,
|
||||
const X86TargetLowering *XTLI) {
|
||||
SDValue Op0 = N->getOperand(0);
|
||||
EVT InVT = Op0->getValueType(0);
|
||||
if (!InVT.isSimple())
|
||||
return SDValue();
|
||||
|
||||
// UINT_TO_FP(v4i8) -> SINT_TO_FP(ZEXT(v4i8 to v4i32))
|
||||
MVT SrcVT = InVT.getSimpleVT();
|
||||
if (SrcVT == MVT::v8i8 || SrcVT == MVT::v4i8) {
|
||||
DebugLoc dl = N->getDebugLoc();
|
||||
MVT DstVT = (SrcVT.getVectorNumElements() == 4 ? MVT::v4i32 : MVT::v8i32);
|
||||
SDValue P = DAG.getNode(ISD::ZERO_EXTEND, dl, DstVT, Op0);
|
||||
// Notice that we use SINT_TO_FP because we know that the high bits
|
||||
// are zero and SINT_TO_FP is better supported by the hardware.
|
||||
return DAG.getNode(ISD::SINT_TO_FP, dl, N->getValueType(0), P);
|
||||
}
|
||||
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
static SDValue PerformSINT_TO_FPCombine(SDNode *N, SelectionDAG &DAG,
|
||||
const X86TargetLowering *XTLI) {
|
||||
SDValue Op0 = N->getOperand(0);
|
||||
EVT InVT = Op0->getValueType(0);
|
||||
if (!InVT.isSimple())
|
||||
return SDValue();
|
||||
|
||||
// SINT_TO_FP(v4i8) -> SINT_TO_FP(SEXT(v4i8 to v4i32))
|
||||
MVT SrcVT = InVT.getSimpleVT();
|
||||
if (SrcVT == MVT::v8i8 || SrcVT == MVT::v4i8) {
|
||||
DebugLoc dl = N->getDebugLoc();
|
||||
MVT DstVT = (SrcVT.getVectorNumElements() == 4 ? MVT::v4i32 : MVT::v8i32);
|
||||
SDValue P = DAG.getNode(ISD::SIGN_EXTEND, dl, DstVT, Op0);
|
||||
return DAG.getNode(ISD::SINT_TO_FP, dl, N->getValueType(0), P);
|
||||
}
|
||||
|
||||
// Transform (SINT_TO_FP (i64 ...)) into an x87 operation if we have
|
||||
// a 32-bit target where SSE doesn't support i64->FP operations.
|
||||
if (Op0.getOpcode() == ISD::LOAD) {
|
||||
|
@ -15006,6 +15042,24 @@ static SDValue PerformSINT_TO_FPCombine(SDNode *N, SelectionDAG &DAG,
|
|||
return SDValue();
|
||||
}
|
||||
|
||||
static SDValue PerformFP_TO_SINTCombine(SDNode *N, SelectionDAG &DAG,
|
||||
const X86TargetLowering *XTLI) {
|
||||
EVT InVT = N->getValueType(0);
|
||||
if (!InVT.isSimple())
|
||||
return SDValue();
|
||||
|
||||
// v4i8 = FP_TO_SINT() -> v4i8 = TRUNCATE (V4i32 = FP_TO_SINT()
|
||||
MVT VT = InVT.getSimpleVT();
|
||||
if (VT == MVT::v8i8 || VT == MVT::v4i8) {
|
||||
DebugLoc dl = N->getDebugLoc();
|
||||
MVT DstVT = (VT.getVectorNumElements() == 4 ? MVT::v4i32 : MVT::v8i32);
|
||||
SDValue I = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, N->getOperand(0));
|
||||
return DAG.getNode(ISD::TRUNCATE, dl, VT, I);
|
||||
}
|
||||
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
// Optimize RES, EFLAGS = X86ISD::ADC LHS, RHS, EFLAGS
|
||||
static SDValue PerformADCCombine(SDNode *N, SelectionDAG &DAG,
|
||||
X86TargetLowering::DAGCombinerInfo &DCI) {
|
||||
|
@ -15142,7 +15196,9 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
|
|||
case ISD::XOR: return PerformXorCombine(N, DAG, DCI, Subtarget);
|
||||
case ISD::LOAD: return PerformLOADCombine(N, DAG, Subtarget);
|
||||
case ISD::STORE: return PerformSTORECombine(N, DAG, Subtarget);
|
||||
case ISD::UINT_TO_FP: return PerformUINT_TO_FPCombine(N, DAG, this);
|
||||
case ISD::SINT_TO_FP: return PerformSINT_TO_FPCombine(N, DAG, this);
|
||||
case ISD::FP_TO_SINT: return PerformFP_TO_SINTCombine(N, DAG, this);
|
||||
case ISD::FADD: return PerformFADDCombine(N, DAG, Subtarget);
|
||||
case ISD::FSUB: return PerformFSUBCombine(N, DAG, Subtarget);
|
||||
case X86ISD::FXOR:
|
||||
|
|
|
@ -0,0 +1,49 @@
|
|||
; RUN: llc < %s -mtriple=i386-apple-darwin10 -mcpu=corei7-avx -mattr=+avx | FileCheck %s
|
||||
|
||||
;CHECK: foo1_8
|
||||
;CHECK: vcvtdq2ps
|
||||
;CHECK: ret
|
||||
define <8 x float> @foo1_8(<8 x i8> %src) {
|
||||
%res = sitofp <8 x i8> %src to <8 x float>
|
||||
ret <8 x float> %res
|
||||
}
|
||||
|
||||
;CHECK: foo1_4
|
||||
;CHECK: vcvtdq2ps
|
||||
;CHECK: ret
|
||||
define <4 x float> @foo1_4(<4 x i8> %src) {
|
||||
%res = sitofp <4 x i8> %src to <4 x float>
|
||||
ret <4 x float> %res
|
||||
}
|
||||
|
||||
;CHECK: foo2_8
|
||||
;CHECK: vcvtdq2ps
|
||||
;CHECK: ret
|
||||
define <8 x float> @foo2_8(<8 x i8> %src) {
|
||||
%res = uitofp <8 x i8> %src to <8 x float>
|
||||
ret <8 x float> %res
|
||||
}
|
||||
|
||||
;CHECK: foo2_4
|
||||
;CHECK: vcvtdq2ps
|
||||
;CHECK: ret
|
||||
define <4 x float> @foo2_4(<4 x i8> %src) {
|
||||
%res = uitofp <4 x i8> %src to <4 x float>
|
||||
ret <4 x float> %res
|
||||
}
|
||||
|
||||
;CHECK: foo3_8
|
||||
;CHECK: vcvttps2dq
|
||||
;CHECK: ret
|
||||
define <8 x i8> @foo3_8(<8 x float> %src) {
|
||||
%res = fptosi <8 x float> %src to <8 x i8>
|
||||
ret <8 x i8> %res
|
||||
}
|
||||
;CHECK: foo3_4
|
||||
;CHECK: vcvttps2dq
|
||||
;CHECK: ret
|
||||
define <4 x i8> @foo3_4(<4 x float> %src) {
|
||||
%res = fptosi <4 x float> %src to <4 x i8>
|
||||
ret <4 x i8> %res
|
||||
}
|
||||
|
Loading…
Reference in New Issue