forked from OSchip/llvm-project
AVX: Move the ZEXT/ANYEXT DAGCo optimizations to the lowering of these optimizations. The old test cases still cover all of these lowering/optimizations. The single change that we have is that now anyext does not need to zero a register, because it does not use the exact code path as the zero_extend.
llvm-svn: 171178
This commit is contained in:
parent
bf18725d11
commit
3da9ac72fa
|
@ -1125,8 +1125,12 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
|
|||
setOperationAction(ISD::VSELECT, MVT::v8i32, Legal);
|
||||
setOperationAction(ISD::VSELECT, MVT::v8f32, Legal);
|
||||
|
||||
setOperationAction(ISD::SIGN_EXTEND, MVT::v4i64, Custom);
|
||||
setOperationAction(ISD::SIGN_EXTEND, MVT::v8i32, Custom);
|
||||
setOperationAction(ISD::SIGN_EXTEND, MVT::v4i64, Custom);
|
||||
setOperationAction(ISD::SIGN_EXTEND, MVT::v8i32, Custom);
|
||||
setOperationAction(ISD::ZERO_EXTEND, MVT::v4i64, Custom);
|
||||
setOperationAction(ISD::ZERO_EXTEND, MVT::v8i32, Custom);
|
||||
setOperationAction(ISD::ANY_EXTEND, MVT::v4i64, Custom);
|
||||
setOperationAction(ISD::ANY_EXTEND, MVT::v8i32, Custom);
|
||||
|
||||
if (Subtarget->hasFMA() || Subtarget->hasFMA4()) {
|
||||
setOperationAction(ISD::FMA, MVT::v8f32, Legal);
|
||||
|
@ -8292,12 +8296,70 @@ FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned, bool IsReplace) co
|
|||
}
|
||||
}
|
||||
|
||||
SDValue X86TargetLowering::lowerZERO_EXTEND(SDValue Op, SelectionDAG &DAG) const {
|
||||
static SDValue LowerAVXExtend(SDValue Op, SelectionDAG &DAG,
|
||||
const X86Subtarget *Subtarget) {
|
||||
EVT VT = Op->getValueType(0);
|
||||
SDValue In = Op->getOperand(0);
|
||||
EVT InVT = In.getValueType();
|
||||
DebugLoc dl = Op->getDebugLoc();
|
||||
|
||||
// Optimize vectors in AVX mode:
|
||||
//
|
||||
// v8i16 -> v8i32
|
||||
// Use vpunpcklwd for 4 lower elements v8i16 -> v4i32.
|
||||
// Use vpunpckhwd for 4 upper elements v8i16 -> v4i32.
|
||||
// Concat upper and lower parts.
|
||||
//
|
||||
// v4i32 -> v4i64
|
||||
// Use vpunpckldq for 4 lower elements v4i32 -> v2i64.
|
||||
// Use vpunpckhdq for 4 upper elements v4i32 -> v2i64.
|
||||
// Concat upper and lower parts.
|
||||
//
|
||||
|
||||
if (((VT != MVT::v8i32) || (InVT != MVT::v8i16)) &&
|
||||
((VT != MVT::v4i64) || (InVT != MVT::v4i32)))
|
||||
return SDValue();
|
||||
|
||||
if (Subtarget->hasInt256())
|
||||
return DAG.getNode(X86ISD::VZEXT_MOVL, dl, VT, In);
|
||||
|
||||
SDValue ZeroVec = getZeroVector(InVT, Subtarget, DAG, dl);
|
||||
SDValue Undef = DAG.getUNDEF(InVT);
|
||||
bool NeedZero = Op.getOpcode() == ISD::ZERO_EXTEND;
|
||||
SDValue OpLo = getUnpackl(DAG, dl, InVT, In, NeedZero ? ZeroVec : Undef);
|
||||
SDValue OpHi = getUnpackh(DAG, dl, InVT, In, NeedZero ? ZeroVec : Undef);
|
||||
|
||||
EVT HVT = EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(),
|
||||
VT.getVectorNumElements()/2);
|
||||
|
||||
OpLo = DAG.getNode(ISD::BITCAST, dl, HVT, OpLo);
|
||||
OpHi = DAG.getNode(ISD::BITCAST, dl, HVT, OpHi);
|
||||
|
||||
return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, OpLo, OpHi);
|
||||
}
|
||||
|
||||
SDValue X86TargetLowering::LowerANY_EXTEND(SDValue Op,
|
||||
SelectionDAG &DAG) const {
|
||||
if (Subtarget->hasFp256()) {
|
||||
SDValue Res = LowerAVXExtend(Op, DAG, Subtarget);
|
||||
if (Res.getNode())
|
||||
return Res;
|
||||
}
|
||||
|
||||
return SDValue();
|
||||
}
|
||||
SDValue X86TargetLowering::LowerZERO_EXTEND(SDValue Op, SelectionDAG &DAG) const {
|
||||
DebugLoc DL = Op.getDebugLoc();
|
||||
EVT VT = Op.getValueType();
|
||||
SDValue In = Op.getOperand(0);
|
||||
EVT SVT = In.getValueType();
|
||||
|
||||
if (Subtarget->hasFp256()) {
|
||||
SDValue Res = LowerAVXExtend(Op, DAG, Subtarget);
|
||||
if (Res.getNode())
|
||||
return Res;
|
||||
}
|
||||
|
||||
if (!VT.is256BitVector() || !SVT.is128BitVector() ||
|
||||
VT.getVectorNumElements() != SVT.getVectorNumElements())
|
||||
return SDValue();
|
||||
|
@ -11849,7 +11911,9 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
|
|||
case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG);
|
||||
case ISD::UINT_TO_FP: return LowerUINT_TO_FP(Op, DAG);
|
||||
case ISD::TRUNCATE: return lowerTRUNCATE(Op, DAG);
|
||||
case ISD::ZERO_EXTEND: return lowerZERO_EXTEND(Op, DAG);
|
||||
case ISD::ZERO_EXTEND: return LowerZERO_EXTEND(Op, DAG);
|
||||
case ISD::SIGN_EXTEND: return LowerSIGN_EXTEND(Op, DAG);
|
||||
case ISD::ANY_EXTEND: return LowerANY_EXTEND(Op, DAG);
|
||||
case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG);
|
||||
case ISD::FP_TO_UINT: return LowerFP_TO_UINT(Op, DAG);
|
||||
case ISD::FP_EXTEND: return lowerFP_EXTEND(Op, DAG);
|
||||
|
@ -11859,7 +11923,6 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
|
|||
case ISD::FGETSIGN: return LowerFGETSIGN(Op, DAG);
|
||||
case ISD::SETCC: return LowerSETCC(Op, DAG);
|
||||
case ISD::SELECT: return LowerSELECT(Op, DAG);
|
||||
case ISD::SIGN_EXTEND: return LowerSIGN_EXTEND(Op, DAG);
|
||||
case ISD::BRCOND: return LowerBRCOND(Op, DAG);
|
||||
case ISD::JumpTable: return LowerJumpTable(Op, DAG);
|
||||
case ISD::VASTART: return LowerVASTART(Op, DAG);
|
||||
|
@ -16856,7 +16919,6 @@ static SDValue PerformZExtCombine(SDNode *N, SelectionDAG &DAG,
|
|||
DebugLoc dl = N->getDebugLoc();
|
||||
SDValue N0 = N->getOperand(0);
|
||||
EVT VT = N->getValueType(0);
|
||||
EVT OpVT = N0.getValueType();
|
||||
|
||||
if (N0.getOpcode() == ISD::AND &&
|
||||
N0.hasOneUse() &&
|
||||
|
@ -16879,43 +16941,6 @@ static SDValue PerformZExtCombine(SDNode *N, SelectionDAG &DAG,
|
|||
return R;
|
||||
}
|
||||
|
||||
// Optimize vectors in AVX mode:
|
||||
//
|
||||
// v8i16 -> v8i32
|
||||
// Use vpunpcklwd for 4 lower elements v8i16 -> v4i32.
|
||||
// Use vpunpckhwd for 4 upper elements v8i16 -> v4i32.
|
||||
// Concat upper and lower parts.
|
||||
//
|
||||
// v4i32 -> v4i64
|
||||
// Use vpunpckldq for 4 lower elements v4i32 -> v2i64.
|
||||
// Use vpunpckhdq for 4 upper elements v4i32 -> v2i64.
|
||||
// Concat upper and lower parts.
|
||||
//
|
||||
if (!DCI.isBeforeLegalizeOps())
|
||||
return SDValue();
|
||||
|
||||
if (!Subtarget->hasFp256())
|
||||
return SDValue();
|
||||
|
||||
if (((VT == MVT::v8i32) && (OpVT == MVT::v8i16)) ||
|
||||
((VT == MVT::v4i64) && (OpVT == MVT::v4i32))) {
|
||||
|
||||
if (Subtarget->hasInt256())
|
||||
return DAG.getNode(X86ISD::VZEXT_MOVL, dl, VT, N0);
|
||||
|
||||
SDValue ZeroVec = getZeroVector(OpVT, Subtarget, DAG, dl);
|
||||
SDValue OpLo = getUnpackl(DAG, dl, OpVT, N0, ZeroVec);
|
||||
SDValue OpHi = getUnpackh(DAG, dl, OpVT, N0, ZeroVec);
|
||||
|
||||
EVT HVT = EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(),
|
||||
VT.getVectorNumElements()/2);
|
||||
|
||||
OpLo = DAG.getNode(ISD::BITCAST, dl, HVT, OpLo);
|
||||
OpHi = DAG.getNode(ISD::BITCAST, dl, HVT, OpHi);
|
||||
|
||||
return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, OpLo, OpHi);
|
||||
}
|
||||
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
|
|
|
@ -813,7 +813,9 @@ namespace llvm {
|
|||
SDValue LowerUINT_TO_FP_i32(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue lowerUINT_TO_FP_vec(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue lowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue lowerZERO_EXTEND(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerZERO_EXTEND(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerANY_EXTEND(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue lowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
|
||||
|
@ -824,7 +826,6 @@ namespace llvm {
|
|||
DebugLoc dl, SelectionDAG &DAG) const;
|
||||
SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerMEMSET(SDValue Op, SelectionDAG &DAG) const;
|
||||
|
|
|
@ -18,11 +18,10 @@ define <4 x i64> @zext_4i32_to_4i64(<4 x i32> %A) nounwind uwtable readnone ssp
|
|||
ret <4 x i64>%B
|
||||
}
|
||||
|
||||
|
||||
define <8 x i32> @zext_8i8_to_8i32(<8 x i8> %z) {
|
||||
;CHECK: zext_8i8_to_8i32
|
||||
;CHECK: vpunpckhwd
|
||||
;CHECK: vpunpcklwd
|
||||
;CHECK: vpmovzxwd
|
||||
;CHECK: vinsertf128
|
||||
;CHECK: ret
|
||||
%t = zext <8 x i8> %z to <8 x i32>
|
||||
|
|
Loading…
Reference in New Issue