diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index daba879c7fce..2302697af2ee 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -65,12 +65,6 @@ using namespace llvm; STATISTIC(NumTailCalls, "Number of tail calls"); -cl::opt ExperimentalVectorWideningLegalization( - "x86-experimental-vector-widening-legalization", cl::init(true), - cl::desc("Enable an experimental vector type legalization through widening " - "rather than promotion."), - cl::Hidden); - static cl::opt ExperimentalPrefLoopAlignment( "x86-experimental-pref-loop-alignment", cl::init(4), cl::desc( @@ -857,12 +851,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::UREM, VT, Custom); } - if (!ExperimentalVectorWideningLegalization) { - setOperationAction(ISD::MUL, MVT::v2i16, Custom); - setOperationAction(ISD::MUL, MVT::v2i32, Custom); - setOperationAction(ISD::MUL, MVT::v4i16, Custom); - } - setOperationAction(ISD::MUL, MVT::v2i8, Custom); setOperationAction(ISD::MUL, MVT::v4i8, Custom); setOperationAction(ISD::MUL, MVT::v8i8, Custom); @@ -901,29 +889,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::UADDSAT, MVT::v2i64, Custom); setOperationAction(ISD::USUBSAT, MVT::v2i64, Custom); - if (!ExperimentalVectorWideningLegalization) { - // Use widening instead of promotion. - for (auto VT : { MVT::v8i8, MVT::v4i8, MVT::v2i8, - MVT::v4i16, MVT::v2i16 }) { - setOperationAction(ISD::UADDSAT, VT, Custom); - setOperationAction(ISD::SADDSAT, VT, Custom); - setOperationAction(ISD::USUBSAT, VT, Custom); - setOperationAction(ISD::SSUBSAT, VT, Custom); - } - } - setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom); setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom); setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom); - // Provide custom widening for v2f32 setcc. This is really for VLX when - // setcc result type returns v2i1/v4i1 vector for v2f32/v4f32 leading to - // type legalization changing the result type to v4i1 during widening. - // It works fine for SSE2 and is probably faster so no need to qualify with - // VLX support. - if (!ExperimentalVectorWideningLegalization) - setOperationAction(ISD::SETCC, MVT::v2i32, Custom); - for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) { setOperationAction(ISD::SETCC, VT, Custom); setOperationAction(ISD::CTPOP, VT, Custom); @@ -943,21 +912,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); } - // We support custom legalizing of sext and anyext loads for specific - // memory vector types which we can load as a scalar (or sequence of - // scalars) and extend in-register to a legal 128-bit vector type. For sext - // loads these must work with a single scalar load. - if (!ExperimentalVectorWideningLegalization) { - for (MVT VT : MVT::integer_fixedlen_vector_valuetypes()) { - setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i8, Custom); - setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i16, Custom); - setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i32, Custom); - setLoadExtAction(ISD::EXTLOAD, VT, MVT::v4i8, Custom); - setLoadExtAction(ISD::EXTLOAD, VT, MVT::v4i16, Custom); - setLoadExtAction(ISD::EXTLOAD, VT, MVT::v8i8, Custom); - } - } - for (auto VT : { MVT::v2f64, MVT::v2i64 }) { setOperationAction(ISD::BUILD_VECTOR, VT, Custom); setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); @@ -1031,18 +985,14 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v4i32, Custom); setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v8i16, Custom); - if (ExperimentalVectorWideningLegalization) { - setOperationAction(ISD::SIGN_EXTEND, MVT::v4i64, Custom); + setOperationAction(ISD::SIGN_EXTEND, MVT::v4i64, Custom); - setOperationAction(ISD::TRUNCATE, MVT::v2i8, Custom); - setOperationAction(ISD::TRUNCATE, MVT::v2i16, Custom); - setOperationAction(ISD::TRUNCATE, MVT::v2i32, Custom); - setOperationAction(ISD::TRUNCATE, MVT::v4i8, Custom); - setOperationAction(ISD::TRUNCATE, MVT::v4i16, Custom); - setOperationAction(ISD::TRUNCATE, MVT::v8i8, Custom); - } else { - setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v4i64, Custom); - } + setOperationAction(ISD::TRUNCATE, MVT::v2i8, Custom); + setOperationAction(ISD::TRUNCATE, MVT::v2i16, Custom); + setOperationAction(ISD::TRUNCATE, MVT::v2i32, Custom); + setOperationAction(ISD::TRUNCATE, MVT::v4i8, Custom); + setOperationAction(ISD::TRUNCATE, MVT::v4i16, Custom); + setOperationAction(ISD::TRUNCATE, MVT::v8i8, Custom); // In the customized shift lowering, the legal v4i32/v2i64 cases // in AVX2 will be recognized. @@ -1109,22 +1059,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Legal); } - if (!ExperimentalVectorWideningLegalization) { - // Avoid narrow result types when widening. The legal types are listed - // in the next loop. - for (MVT VT : MVT::integer_fixedlen_vector_valuetypes()) { - setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i8, Custom); - setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i16, Custom); - setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i32, Custom); - } - } - // SSE41 also has vector sign/zero extending loads, PMOV[SZ]X for (auto LoadExtOp : { ISD::SEXTLOAD, ISD::ZEXTLOAD }) { setLoadExtAction(LoadExtOp, MVT::v8i16, MVT::v8i8, Legal); setLoadExtAction(LoadExtOp, MVT::v4i32, MVT::v4i8, Legal); - if (!ExperimentalVectorWideningLegalization) - setLoadExtAction(LoadExtOp, MVT::v2i32, MVT::v2i8, Legal); setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i8, Legal); setLoadExtAction(LoadExtOp, MVT::v4i32, MVT::v4i16, Legal); setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i16, Legal); @@ -1476,12 +1414,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom); setOperationAction(ISD::SIGN_EXTEND, MVT::v8i64, Custom); - if (ExperimentalVectorWideningLegalization) { - // Need to custom widen this if we don't have AVX512BW. - setOperationAction(ISD::ANY_EXTEND, MVT::v8i8, Custom); - setOperationAction(ISD::ZERO_EXTEND, MVT::v8i8, Custom); - setOperationAction(ISD::SIGN_EXTEND, MVT::v8i8, Custom); - } + // Need to custom widen this if we don't have AVX512BW. + setOperationAction(ISD::ANY_EXTEND, MVT::v8i8, Custom); + setOperationAction(ISD::ZERO_EXTEND, MVT::v8i8, Custom); + setOperationAction(ISD::SIGN_EXTEND, MVT::v8i8, Custom); for (auto VT : { MVT::v16f32, MVT::v8f64 }) { setOperationAction(ISD::FFLOOR, VT, Legal); @@ -1985,8 +1921,7 @@ X86TargetLowering::getPreferredVectorAction(MVT VT) const { if (VT == MVT::v32i1 && Subtarget.hasAVX512() && !Subtarget.hasBWI()) return TypeSplitVector; - if (ExperimentalVectorWideningLegalization && - VT.getVectorNumElements() != 1 && + if (VT.getVectorNumElements() != 1 && VT.getVectorElementType() != MVT::i1) return TypeWidenVector; @@ -18994,7 +18929,7 @@ static SDValue LowerAVXExtend(SDValue Op, SelectionDAG &DAG, // Custom legalize v8i8->v8i64 on CPUs without avx512bw. if (InVT == MVT::v8i8) { - if (!ExperimentalVectorWideningLegalization || VT != MVT::v8i64) + if (VT != MVT::v8i64) return SDValue(); In = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(Op), @@ -19325,22 +19260,6 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const { SDValue Lo, Hi; std::tie(Lo, Hi) = DAG.SplitVector(In, DL); - if (!ExperimentalVectorWideningLegalization) { - // Without vector widening we need to manually construct X86 specific - // nodes and an unpcklqdq. - Lo = DAG.getNode(X86ISD::VTRUNC, DL, VT, Lo); - Hi = DAG.getNode(X86ISD::VTRUNC, DL, VT, Hi); - - // Manually concat the truncates using a shuffle. - unsigned NumElts = VT.getVectorNumElements(); - SmallVector ShufMask(NumElts); - for (unsigned i = 0; i != NumElts / 2; ++i) - ShufMask[i] = i; - for (unsigned i = NumElts / 2; i != NumElts; ++i) - ShufMask[i] = i + (NumElts / 2); - return DAG.getVectorShuffle(VT, DL, Lo, Hi, ShufMask); - } - EVT LoVT, HiVT; std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT); @@ -20656,14 +20575,6 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget, assert(VT.getVectorNumElements() == VTOp0.getVectorNumElements() && "Invalid number of packed elements for source and destination!"); - // This is being called by type legalization because v2i32 is marked custom - // for result type legalization for v2f32. - if (VTOp0 == MVT::v2i32) { - assert(!ExperimentalVectorWideningLegalization && - "Should only get here with promote legalization!"); - return SDValue(); - } - // The non-AVX512 code below works under the assumption that source and // destination types are the same. assert((Subtarget.hasAVX512() || (VT == VTOp0)) && @@ -21676,7 +21587,7 @@ static SDValue LowerSIGN_EXTEND(SDValue Op, const X86Subtarget &Subtarget, // Custom legalize v8i8->v8i64 on CPUs without avx512bw. if (InVT == MVT::v8i8) { - if (!ExperimentalVectorWideningLegalization || VT != MVT::v8i64) + if (VT != MVT::v8i64) return SDValue(); In = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(Op), @@ -21820,14 +21731,13 @@ static SDValue LowerStore(SDValue Op, const X86Subtarget &Subtarget, return SDValue(); } + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); assert(StoreVT.isVector() && StoreVT.getSizeInBits() == 64 && "Unexpected VT"); - if (DAG.getTargetLoweringInfo().getTypeAction(*DAG.getContext(), StoreVT) != - TargetLowering::TypeWidenVector) - return SDValue(); + assert(TLI.getTypeAction(*DAG.getContext(), StoreVT) == + TargetLowering::TypeWidenVector && "Unexpected type action!"); - MVT WideVT = MVT::getVectorVT(StoreVT.getVectorElementType(), - StoreVT.getVectorNumElements() * 2); + EVT WideVT = TLI.getTypeToTransformTo(*DAG.getContext(), StoreVT); StoredVal = DAG.getNode(ISD::CONCAT_VECTORS, dl, WideVT, StoredVal, DAG.getUNDEF(StoreVT)); @@ -21867,11 +21777,10 @@ static SDValue LowerLoad(SDValue Op, const X86Subtarget &Subtarget, LoadSDNode *Ld = cast(Op.getNode()); SDLoc dl(Ld); - EVT MemVT = Ld->getMemoryVT(); // Without AVX512DQ, we need to use a scalar type for v2i1/v4i1/v8i1 loads. if (RegVT.getVectorElementType() == MVT::i1) { - assert(EVT(RegVT) == MemVT && "Expected non-extending load"); + assert(EVT(RegVT) == Ld->getMemoryVT() && "Expected non-extending load"); assert(RegVT.getVectorNumElements() <= 8 && "Unexpected VT"); assert(Subtarget.hasAVX512() && !Subtarget.hasDQI() && "Expected AVX512F without AVX512DQI"); @@ -21890,179 +21799,7 @@ static SDValue LowerLoad(SDValue Op, const X86Subtarget &Subtarget, return DAG.getMergeValues({Val, NewLd.getValue(1)}, dl); } - if (ExperimentalVectorWideningLegalization) - return SDValue(); - - // Nothing useful we can do without SSE2 shuffles. - assert(Subtarget.hasSSE2() && "We only custom lower sext loads with SSE2."); - - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - unsigned RegSz = RegVT.getSizeInBits(); - - ISD::LoadExtType Ext = Ld->getExtensionType(); - - assert((Ext == ISD::EXTLOAD || Ext == ISD::SEXTLOAD) - && "Only anyext and sext are currently implemented."); - assert(MemVT != RegVT && "Cannot extend to the same type"); - assert(MemVT.isVector() && "Must load a vector from memory"); - - unsigned NumElems = RegVT.getVectorNumElements(); - unsigned MemSz = MemVT.getSizeInBits(); - assert(RegSz > MemSz && "Register size must be greater than the mem size"); - - if (Ext == ISD::SEXTLOAD && RegSz == 256 && !Subtarget.hasInt256()) { - // The only way in which we have a legal 256-bit vector result but not the - // integer 256-bit operations needed to directly lower a sextload is if we - // have AVX1 but not AVX2. In that case, we can always emit a sextload to - // a 128-bit vector and a normal sign_extend to 256-bits that should get - // correctly legalized. We do this late to allow the canonical form of - // sextload to persist throughout the rest of the DAG combiner -- it wants - // to fold together any extensions it can, and so will fuse a sign_extend - // of an sextload into a sextload targeting a wider value. - SDValue Load; - if (MemSz == 128) { - // Just switch this to a normal load. - assert(TLI.isTypeLegal(MemVT) && "If the memory type is a 128-bit type, " - "it must be a legal 128-bit vector " - "type!"); - Load = DAG.getLoad(MemVT, dl, Ld->getChain(), Ld->getBasePtr(), - Ld->getPointerInfo(), Ld->getAlignment(), - Ld->getMemOperand()->getFlags()); - } else { - assert(MemSz < 128 && - "Can't extend a type wider than 128 bits to a 256 bit vector!"); - // Do an sext load to a 128-bit vector type. We want to use the same - // number of elements, but elements half as wide. This will end up being - // recursively lowered by this routine, but will succeed as we definitely - // have all the necessary features if we're using AVX1. - EVT HalfEltVT = - EVT::getIntegerVT(*DAG.getContext(), RegVT.getScalarSizeInBits() / 2); - EVT HalfVecVT = EVT::getVectorVT(*DAG.getContext(), HalfEltVT, NumElems); - Load = - DAG.getExtLoad(Ext, dl, HalfVecVT, Ld->getChain(), Ld->getBasePtr(), - Ld->getPointerInfo(), MemVT, Ld->getAlignment(), - Ld->getMemOperand()->getFlags()); - } - - // Replace chain users with the new chain. - assert(Load->getNumValues() == 2 && "Loads must carry a chain!"); - - // Finally, do a normal sign-extend to the desired register. - SDValue SExt = DAG.getSExtOrTrunc(Load, dl, RegVT); - return DAG.getMergeValues({SExt, Load.getValue(1)}, dl); - } - - // All sizes must be a power of two. - assert(isPowerOf2_32(RegSz * MemSz * NumElems) && - "Non-power-of-two elements are not custom lowered!"); - - // Attempt to load the original value using scalar loads. - // Find the largest scalar type that divides the total loaded size. - MVT SclrLoadTy = MVT::i8; - for (MVT Tp : MVT::integer_valuetypes()) { - if (TLI.isTypeLegal(Tp) && ((MemSz % Tp.getSizeInBits()) == 0)) { - SclrLoadTy = Tp; - } - } - - // On 32bit systems, we can't save 64bit integers. Try bitcasting to F64. - if (TLI.isTypeLegal(MVT::f64) && SclrLoadTy.getSizeInBits() < 64 && - (64 <= MemSz)) - SclrLoadTy = MVT::f64; - - // Calculate the number of scalar loads that we need to perform - // in order to load our vector from memory. - unsigned NumLoads = MemSz / SclrLoadTy.getSizeInBits(); - - assert((Ext != ISD::SEXTLOAD || NumLoads == 1) && - "Can only lower sext loads with a single scalar load!"); - - unsigned loadRegSize = RegSz; - if (Ext == ISD::SEXTLOAD && RegSz >= 256) - loadRegSize = 128; - - // If we don't have BWI we won't be able to create the shuffle needed for - // v8i8->v8i64. - if (Ext == ISD::EXTLOAD && !Subtarget.hasBWI() && RegVT == MVT::v8i64 && - MemVT == MVT::v8i8) - loadRegSize = 128; - - // Represent our vector as a sequence of elements which are the - // largest scalar that we can load. - EVT LoadUnitVecVT = EVT::getVectorVT( - *DAG.getContext(), SclrLoadTy, loadRegSize / SclrLoadTy.getSizeInBits()); - - // Represent the data using the same element type that is stored in - // memory. In practice, we ''widen'' MemVT. - EVT WideVecVT = - EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), - loadRegSize / MemVT.getScalarSizeInBits()); - - assert(WideVecVT.getSizeInBits() == LoadUnitVecVT.getSizeInBits() && - "Invalid vector type"); - - // We can't shuffle using an illegal type. - assert(TLI.isTypeLegal(WideVecVT) && - "We only lower types that form legal widened vector types"); - - SmallVector Chains; - SDValue Ptr = Ld->getBasePtr(); - unsigned OffsetInc = SclrLoadTy.getSizeInBits() / 8; - SDValue Increment = DAG.getConstant(OffsetInc, dl, - TLI.getPointerTy(DAG.getDataLayout())); - SDValue Res = DAG.getUNDEF(LoadUnitVecVT); - - unsigned Offset = 0; - for (unsigned i = 0; i < NumLoads; ++i) { - unsigned NewAlign = MinAlign(Ld->getAlignment(), Offset); - - // Perform a single load. - SDValue ScalarLoad = - DAG.getLoad(SclrLoadTy, dl, Ld->getChain(), Ptr, - Ld->getPointerInfo().getWithOffset(Offset), - NewAlign, Ld->getMemOperand()->getFlags()); - Chains.push_back(ScalarLoad.getValue(1)); - // Create the first element type using SCALAR_TO_VECTOR in order to avoid - // another round of DAGCombining. - if (i == 0) - Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, LoadUnitVecVT, ScalarLoad); - else - Res = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, LoadUnitVecVT, Res, - ScalarLoad, DAG.getIntPtrConstant(i, dl)); - - Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment); - Offset += OffsetInc; - } - - SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains); - - // Bitcast the loaded value to a vector of the original element type, in - // the size of the target vector type. - SDValue SlicedVec = DAG.getBitcast(WideVecVT, Res); - unsigned SizeRatio = RegSz / MemSz; - - if (Ext == ISD::SEXTLOAD) { - SDValue Sext = getExtendInVec(ISD::SIGN_EXTEND, dl, RegVT, SlicedVec, DAG); - return DAG.getMergeValues({Sext, TF}, dl); - } - - if (Ext == ISD::EXTLOAD && !Subtarget.hasBWI() && RegVT == MVT::v8i64 && - MemVT == MVT::v8i8) { - SDValue Sext = getExtendInVec(ISD::ZERO_EXTEND, dl, RegVT, SlicedVec, DAG); - return DAG.getMergeValues({Sext, TF}, dl); - } - - // Redistribute the loaded elements into the different locations. - SmallVector ShuffleVec(NumElems * SizeRatio, -1); - for (unsigned i = 0; i != NumElems; ++i) - ShuffleVec[i * SizeRatio] = i; - - SDValue Shuff = DAG.getVectorShuffle(WideVecVT, dl, SlicedVec, - DAG.getUNDEF(WideVecVT), ShuffleVec); - - // Bitcast to the requested type. - Shuff = DAG.getBitcast(RegVT, Shuff); - return DAG.getMergeValues({Shuff, TF}, dl); + return SDValue(); } /// Return true if node is an ISD::AND or ISD::OR of two X86ISD::SETCC nodes @@ -27518,12 +27255,13 @@ static SDValue LowerMSCATTER(SDValue Op, const X86Subtarget &Subtarget, SDValue Chain = N->getChain(); SDValue BasePtr = N->getBasePtr(); - if (VT == MVT::v2f32) { + if (VT == MVT::v2f32 || VT == MVT::v2i32) { assert(Mask.getValueType() == MVT::v2i1 && "Unexpected mask type"); // If the index is v2i64 and we have VLX we can use xmm for data and index. if (Index.getValueType() == MVT::v2i64 && Subtarget.hasVLX()) { - Src = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32, Src, - DAG.getUNDEF(MVT::v2f32)); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + EVT WideVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + Src = DAG.getNode(ISD::CONCAT_VECTORS, dl, WideVT, Src, DAG.getUNDEF(VT)); SDVTList VTs = DAG.getVTList(MVT::v2i1, MVT::Other); SDValue Ops[] = {Chain, Src, Mask, BasePtr, Index, Scale}; SDValue NewScatter = DAG.getTargetMemSDNode( @@ -27533,30 +27271,6 @@ static SDValue LowerMSCATTER(SDValue Op, const X86Subtarget &Subtarget, return SDValue(); } - if (VT == MVT::v2i32) { - assert(Mask.getValueType() == MVT::v2i1 && "Unexpected mask type"); - Src = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i32, Src, - DAG.getUNDEF(MVT::v2i32)); - // If the index is v2i64 and we have VLX we can use xmm for data and index. - if (Index.getValueType() == MVT::v2i64 && Subtarget.hasVLX()) { - SDVTList VTs = DAG.getVTList(MVT::v2i1, MVT::Other); - SDValue Ops[] = {Chain, Src, Mask, BasePtr, Index, Scale}; - SDValue NewScatter = DAG.getTargetMemSDNode( - VTs, Ops, dl, N->getMemoryVT(), N->getMemOperand()); - return SDValue(NewScatter.getNode(), 1); - } - // Custom widen all the operands to avoid promotion. - EVT NewIndexVT = EVT::getVectorVT( - *DAG.getContext(), Index.getValueType().getVectorElementType(), 4); - Index = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewIndexVT, Index, - DAG.getUNDEF(Index.getValueType())); - Mask = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i1, Mask, - DAG.getConstant(0, dl, MVT::v2i1)); - SDValue Ops[] = {Chain, Src, Mask, BasePtr, Index, Scale}; - return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), N->getMemoryVT(), dl, - Ops, N->getMemOperand(), N->getIndexType()); - } - MVT IndexVT = Index.getSimpleValueType(); MVT MaskVT = Mask.getSimpleValueType(); @@ -27990,37 +27704,22 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, } case ISD::MUL: { EVT VT = N->getValueType(0); - assert(VT.isVector() && "Unexpected VT"); - if (getTypeAction(*DAG.getContext(), VT) == TypePromoteInteger && - VT.getVectorNumElements() == 2) { - // Promote to a pattern that will be turned into PMULUDQ. - SDValue N0 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::v2i64, - N->getOperand(0)); - SDValue N1 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::v2i64, - N->getOperand(1)); - SDValue Mul = DAG.getNode(X86ISD::PMULUDQ, dl, MVT::v2i64, N0, N1); - Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, VT, Mul)); - } else if (getTypeAction(*DAG.getContext(), VT) == TypeWidenVector && - VT.getVectorElementType() == MVT::i8) { - // Pre-promote these to vXi16 to avoid op legalization thinking all 16 - // elements are needed. - MVT MulVT = MVT::getVectorVT(MVT::i16, VT.getVectorNumElements()); - SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, dl, MulVT, N->getOperand(0)); - SDValue Op1 = DAG.getNode(ISD::ANY_EXTEND, dl, MulVT, N->getOperand(1)); - SDValue Res = DAG.getNode(ISD::MUL, dl, MulVT, Op0, Op1); - Res = DAG.getNode(ISD::TRUNCATE, dl, VT, Res); - unsigned NumConcats = 16 / VT.getVectorNumElements(); - SmallVector ConcatOps(NumConcats, DAG.getUNDEF(VT)); - ConcatOps[0] = Res; - Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v16i8, ConcatOps); - Results.push_back(Res); - } + assert(getTypeAction(*DAG.getContext(), VT) == TypeWidenVector && + VT.getVectorElementType() == MVT::i8 && "Unexpected VT!"); + // Pre-promote these to vXi16 to avoid op legalization thinking all 16 + // elements are needed. + MVT MulVT = MVT::getVectorVT(MVT::i16, VT.getVectorNumElements()); + SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, dl, MulVT, N->getOperand(0)); + SDValue Op1 = DAG.getNode(ISD::ANY_EXTEND, dl, MulVT, N->getOperand(1)); + SDValue Res = DAG.getNode(ISD::MUL, dl, MulVT, Op0, Op1); + Res = DAG.getNode(ISD::TRUNCATE, dl, VT, Res); + unsigned NumConcats = 16 / VT.getVectorNumElements(); + SmallVector ConcatOps(NumConcats, DAG.getUNDEF(VT)); + ConcatOps[0] = Res; + Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v16i8, ConcatOps); + Results.push_back(Res); return; } - case ISD::UADDSAT: - case ISD::SADDSAT: - case ISD::USUBSAT: - case ISD::SSUBSAT: case X86ISD::VPMADDWD: case X86ISD::AVG: { // Legalize types for ISD::UADDSAT/SADDSAT/USUBSAT/SSUBSAT and @@ -28031,6 +27730,8 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, EVT InVT = N->getOperand(0).getValueType(); assert(VT.getSizeInBits() < 128 && 128 % VT.getSizeInBits() == 0 && "Expected a VT that divides into 128 bits."); + assert(getTypeAction(*DAG.getContext(), VT) == TypeWidenVector && + "Unexpected type action!"); unsigned NumConcat = 128 / InVT.getSizeInBits(); EVT InWideVT = EVT::getVectorVT(*DAG.getContext(), @@ -28047,9 +27748,6 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, SDValue InVec1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, InWideVT, Ops); SDValue Res = DAG.getNode(N->getOpcode(), dl, WideVT, InVec0, InVec1); - if (getTypeAction(*DAG.getContext(), VT) != TypeWidenVector) - Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, Res, - DAG.getIntPtrConstant(0, dl)); Results.push_back(Res); return; } @@ -28078,26 +27776,6 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, Results.push_back(Hi); return; } - case ISD::SETCC: { - // Widen v2i32 (setcc v2f32). This is really needed for AVX512VL when - // setCC result type is v2i1 because type legalzation will end up with - // a v4i1 setcc plus an extend. - assert(N->getValueType(0) == MVT::v2i32 && "Unexpected type"); - if (N->getOperand(0).getValueType() != MVT::v2f32 || - getTypeAction(*DAG.getContext(), MVT::v2i32) == TypeWidenVector) - return; - SDValue UNDEF = DAG.getUNDEF(MVT::v2f32); - SDValue LHS = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32, - N->getOperand(0), UNDEF); - SDValue RHS = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32, - N->getOperand(1), UNDEF); - SDValue Res = DAG.getNode(ISD::SETCC, dl, MVT::v4i32, LHS, RHS, - N->getOperand(2)); - Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i32, Res, - DAG.getIntPtrConstant(0, dl)); - Results.push_back(Res); - return; - } // We might have generated v2f32 FMIN/FMAX operations. Widen them to v4f32. case X86ISD::FMINC: case X86ISD::FMIN: @@ -28118,7 +27796,9 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, case ISD::SREM: case ISD::UREM: { EVT VT = N->getValueType(0); - if (getTypeAction(*DAG.getContext(), VT) == TypeWidenVector) { + if (VT.isVector()) { + assert(getTypeAction(*DAG.getContext(), VT) == TypeWidenVector && + "Unexpected type action!"); // If this RHS is a constant splat vector we can widen this and let // division/remainder by constant optimize it. // TODO: Can we do something for non-splat? @@ -28136,17 +27816,6 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, return; } - if (VT == MVT::v2i32) { - // Legalize v2i32 div/rem by unrolling. Otherwise we promote to the - // v2i64 and unroll later. But then we create i64 scalar ops which - // might be slow in 64-bit mode or require a libcall in 32-bit mode. - Results.push_back(DAG.UnrollVectorOp(N)); - return; - } - - if (VT.isVector()) - return; - LLVM_FALLTHROUGH; } case ISD::SDIVREM: @@ -28206,54 +27875,15 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, } return; } - case ISD::SIGN_EXTEND_VECTOR_INREG: { - if (ExperimentalVectorWideningLegalization) - return; - - EVT VT = N->getValueType(0); - SDValue In = N->getOperand(0); - EVT InVT = In.getValueType(); - if (!Subtarget.hasSSE41() && VT == MVT::v4i64 && - (InVT == MVT::v16i16 || InVT == MVT::v32i8)) { - // Custom split this so we can extend i8/i16->i32 invec. This is better - // since sign_extend_inreg i8/i16->i64 requires an extend to i32 using - // sra. Then extending from i32 to i64 using pcmpgt. By custom splitting - // we allow the sra from the extend to i32 to be shared by the split. - EVT ExtractVT = InVT.getHalfNumVectorElementsVT(*DAG.getContext()); - MVT ExtendVT = MVT::getVectorVT(MVT::i32, - VT.getVectorNumElements()); - In = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ExtractVT, - In, DAG.getIntPtrConstant(0, dl)); - In = DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, dl, MVT::v4i32, In); - - // Fill a vector with sign bits for each element. - SDValue Zero = DAG.getConstant(0, dl, ExtendVT); - SDValue SignBits = DAG.getSetCC(dl, ExtendVT, Zero, In, ISD::SETGT); - - EVT LoVT, HiVT; - std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); - - // Create an unpackl and unpackh to interleave the sign bits then bitcast - // to vXi64. - SDValue Lo = getUnpackl(DAG, dl, ExtendVT, In, SignBits); - Lo = DAG.getNode(ISD::BITCAST, dl, LoVT, Lo); - SDValue Hi = getUnpackh(DAG, dl, ExtendVT, In, SignBits); - Hi = DAG.getNode(ISD::BITCAST, dl, HiVT, Hi); - - SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Lo, Hi); - Results.push_back(Res); - return; - } - return; - } case ISD::SIGN_EXTEND: case ISD::ZERO_EXTEND: { EVT VT = N->getValueType(0); SDValue In = N->getOperand(0); EVT InVT = In.getValueType(); if (!Subtarget.hasSSE41() && VT == MVT::v4i64 && - (InVT == MVT::v4i16 || InVT == MVT::v4i8) && - getTypeAction(*DAG.getContext(), InVT) == TypeWidenVector) { + (InVT == MVT::v4i16 || InVT == MVT::v4i8)){ + assert(getTypeAction(*DAG.getContext(), InVT) == TypeWidenVector && + "Unexpected type action!"); assert(N->getOpcode() == ISD::SIGN_EXTEND && "Unexpected opcode"); // Custom split this so we can extend i8/i16->i32 invec. This is better // since sign_extend_inreg i8/i16->i64 requires an extend to i32 using @@ -28324,27 +27954,9 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, SDValue Src = N->getOperand(0); EVT SrcVT = Src.getValueType(); - // Promote these manually to avoid over promotion to v2i64. Type - // legalization will revisit the v2i32 operation for more cleanup. - if ((VT == MVT::v2i8 || VT == MVT::v2i16) && - getTypeAction(*DAG.getContext(), VT) == TypePromoteInteger) { - // AVX512DQ provides instructions that produce a v2i64 result. - if (Subtarget.hasDQI()) - return; - - SDValue Res = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v2i32, Src); - Res = DAG.getNode(N->getOpcode() == ISD::FP_TO_UINT ? ISD::AssertZext - : ISD::AssertSext, - dl, MVT::v2i32, Res, - DAG.getValueType(VT.getVectorElementType())); - Res = DAG.getNode(ISD::TRUNCATE, dl, VT, Res); - Results.push_back(Res); - return; - } - if (VT.isVector() && VT.getScalarSizeInBits() < 32) { - if (getTypeAction(*DAG.getContext(), VT) != TypeWidenVector) - return; + assert(getTypeAction(*DAG.getContext(), VT) == TypeWidenVector && + "Unexpected type action!"); // Try to create a 128 bit vector, but don't exceed a 32 bit element. unsigned NewEltWidth = std::min(128 / VT.getVectorNumElements(), 32U); @@ -28379,35 +27991,18 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, assert((IsSigned || Subtarget.hasAVX512()) && "Can only handle signed conversion without AVX512"); assert(Subtarget.hasSSE2() && "Requires at least SSE2!"); - bool Widenv2i32 = - getTypeAction(*DAG.getContext(), MVT::v2i32) == TypeWidenVector; + assert(getTypeAction(*DAG.getContext(), VT) == TypeWidenVector && + "Unexpected type action!"); if (Src.getValueType() == MVT::v2f64) { - unsigned Opc = IsSigned ? X86ISD::CVTTP2SI : X86ISD::CVTTP2UI; if (!IsSigned && !Subtarget.hasVLX()) { - // If v2i32 is widened, we can defer to the generic legalizer. - if (Widenv2i32) - return; - // Custom widen by doubling to a legal vector with. Isel will - // further widen to v8f64. - Opc = ISD::FP_TO_UINT; - Src = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f64, - Src, DAG.getUNDEF(MVT::v2f64)); + // If we have VLX we can emit a target specific FP_TO_UINT node, + // otherwise we can defer to the generic legalizer which will widen + // the input as well. This will be further widened during op + // legalization to v8i32<-v8f64. + return; } + unsigned Opc = IsSigned ? X86ISD::CVTTP2SI : X86ISD::CVTTP2UI; SDValue Res = DAG.getNode(Opc, dl, MVT::v4i32, Src); - if (!Widenv2i32) - Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i32, Res, - DAG.getIntPtrConstant(0, dl)); - Results.push_back(Res); - return; - } - if (SrcVT == MVT::v2f32 && - getTypeAction(*DAG.getContext(), VT) != TypeWidenVector) { - SDValue Idx = DAG.getIntPtrConstant(0, dl); - SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32, Src, - DAG.getUNDEF(MVT::v2f32)); - Res = DAG.getNode(IsSigned ? ISD::FP_TO_SINT - : ISD::FP_TO_UINT, dl, MVT::v4i32, Res); - Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i32, Res, Idx); Results.push_back(Res); return; } @@ -28417,6 +28012,8 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, return; } + assert(!VT.isVector() && "Vectors should have been handled above!"); + if (Subtarget.hasDQI() && VT == MVT::i64 && (SrcVT == MVT::f32 || SrcVT == MVT::f64)) { assert(!Subtarget.is64Bit() && "i64 should be legal"); @@ -28701,42 +28298,33 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, return; } - if (DstVT.isVector() && SrcVT == MVT::x86mmx && - getTypeAction(*DAG.getContext(), DstVT) == TypeWidenVector) { + if (DstVT.isVector() && SrcVT == MVT::x86mmx) { + assert(getTypeAction(*DAG.getContext(), DstVT) == TypeWidenVector && + "Unexpected type action!"); EVT WideVT = getTypeToTransformTo(*DAG.getContext(), DstVT); SDValue Res = DAG.getNode(X86ISD::MOVQ2DQ, dl, WideVT, N->getOperand(0)); Results.push_back(Res); return; } - if (SrcVT != MVT::f64 || - (DstVT != MVT::v2i32 && DstVT != MVT::v4i16 && DstVT != MVT::v8i8) || - getTypeAction(*DAG.getContext(), DstVT) == TypeWidenVector) - return; - - unsigned NumElts = DstVT.getVectorNumElements(); - EVT SVT = DstVT.getVectorElementType(); - EVT WiderVT = EVT::getVectorVT(*DAG.getContext(), SVT, NumElts * 2); - SDValue Res; - Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f64, N->getOperand(0)); - Res = DAG.getBitcast(WiderVT, Res); - Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DstVT, Res, - DAG.getIntPtrConstant(0, dl)); - Results.push_back(Res); return; } case ISD::MGATHER: { EVT VT = N->getValueType(0); - if (VT == MVT::v2f32 && (Subtarget.hasVLX() || !Subtarget.hasAVX512())) { + if ((VT == MVT::v2f32 || VT == MVT::v2i32) && + (Subtarget.hasVLX() || !Subtarget.hasAVX512())) { auto *Gather = cast(N); SDValue Index = Gather->getIndex(); if (Index.getValueType() != MVT::v2i64) return; + assert(getTypeAction(*DAG.getContext(), VT) == TypeWidenVector && + "Unexpected type action!"); + EVT WideVT = getTypeToTransformTo(*DAG.getContext(), VT); SDValue Mask = Gather->getMask(); assert(Mask.getValueType() == MVT::v2i1 && "Unexpected mask type"); - SDValue PassThru = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32, + SDValue PassThru = DAG.getNode(ISD::CONCAT_VECTORS, dl, WideVT, Gather->getPassThru(), - DAG.getUNDEF(MVT::v2f32)); + DAG.getUNDEF(VT)); if (!Subtarget.hasVLX()) { // We need to widen the mask, but the instruction will only use 2 // of its elements. So we can use undef. @@ -28747,67 +28335,12 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, SDValue Ops[] = { Gather->getChain(), PassThru, Mask, Gather->getBasePtr(), Index, Gather->getScale() }; SDValue Res = DAG.getTargetMemSDNode( - DAG.getVTList(MVT::v4f32, Mask.getValueType(), MVT::Other), Ops, dl, + DAG.getVTList(WideVT, Mask.getValueType(), MVT::Other), Ops, dl, Gather->getMemoryVT(), Gather->getMemOperand()); Results.push_back(Res); Results.push_back(Res.getValue(2)); return; } - if (VT == MVT::v2i32) { - auto *Gather = cast(N); - SDValue Index = Gather->getIndex(); - SDValue Mask = Gather->getMask(); - assert(Mask.getValueType() == MVT::v2i1 && "Unexpected mask type"); - SDValue PassThru = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i32, - Gather->getPassThru(), - DAG.getUNDEF(MVT::v2i32)); - // If the index is v2i64 we can use it directly. - if (Index.getValueType() == MVT::v2i64 && - (Subtarget.hasVLX() || !Subtarget.hasAVX512())) { - if (!Subtarget.hasVLX()) { - // We need to widen the mask, but the instruction will only use 2 - // of its elements. So we can use undef. - Mask = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i1, Mask, - DAG.getUNDEF(MVT::v2i1)); - Mask = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, Mask); - } - SDValue Ops[] = { Gather->getChain(), PassThru, Mask, - Gather->getBasePtr(), Index, Gather->getScale() }; - SDValue Res = DAG.getTargetMemSDNode( - DAG.getVTList(MVT::v4i32, Mask.getValueType(), MVT::Other), Ops, dl, - Gather->getMemoryVT(), Gather->getMemOperand()); - SDValue Chain = Res.getValue(2); - if (getTypeAction(*DAG.getContext(), VT) != TypeWidenVector) - Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i32, Res, - DAG.getIntPtrConstant(0, dl)); - Results.push_back(Res); - Results.push_back(Chain); - return; - } - if (getTypeAction(*DAG.getContext(), VT) != TypeWidenVector) { - EVT IndexVT = Index.getValueType(); - EVT NewIndexVT = EVT::getVectorVT(*DAG.getContext(), - IndexVT.getScalarType(), 4); - // Otherwise we need to custom widen everything to avoid promotion. - Index = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewIndexVT, Index, - DAG.getUNDEF(IndexVT)); - Mask = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i1, Mask, - DAG.getConstant(0, dl, MVT::v2i1)); - SDValue Ops[] = { Gather->getChain(), PassThru, Mask, - Gather->getBasePtr(), Index, Gather->getScale() }; - SDValue Res = DAG.getMaskedGather(DAG.getVTList(MVT::v4i32, MVT::Other), - Gather->getMemoryVT(), dl, Ops, - Gather->getMemOperand(), - Gather->getIndexType()); - SDValue Chain = Res.getValue(1); - if (getTypeAction(*DAG.getContext(), MVT::v2i32) != TypeWidenVector) - Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i32, Res, - DAG.getIntPtrConstant(0, dl)); - Results.push_back(Res); - Results.push_back(Chain); - return; - } - } return; } case ISD::LOAD: { @@ -28816,8 +28349,8 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, // cast since type legalization will try to use an i64 load. MVT VT = N->getSimpleValueType(0); assert(VT.isVector() && VT.getSizeInBits() == 64 && "Unexpected VT"); - if (getTypeAction(*DAG.getContext(), VT) != TypeWidenVector) - return; + assert(getTypeAction(*DAG.getContext(), VT) == TypeWidenVector && + "Unexpected type action!"); if (!ISD::isNON_EXTLoad(N)) return; auto *Ld = cast(N); @@ -28827,11 +28360,10 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, Ld->getPointerInfo(), Ld->getAlignment(), Ld->getMemOperand()->getFlags()); SDValue Chain = Res.getValue(1); - MVT WideVT = MVT::getVectorVT(LdVT, 2); - Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, WideVT, Res); - MVT CastVT = MVT::getVectorVT(VT.getVectorElementType(), - VT.getVectorNumElements() * 2); - Res = DAG.getBitcast(CastVT, Res); + MVT VecVT = MVT::getVectorVT(LdVT, 2); + Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VecVT, Res); + EVT WideVT = getTypeToTransformTo(*DAG.getContext(), VT); + Res = DAG.getBitcast(WideVT, Res); Results.push_back(Res); Results.push_back(Chain); return; @@ -34407,67 +33939,6 @@ static SDValue combineShuffle(SDNode *N, SelectionDAG &DAG, return HAddSub; } - // During Type Legalization, when promoting illegal vector types, - // the backend might introduce new shuffle dag nodes and bitcasts. - // - // This code performs the following transformation: - // fold: (shuffle (bitcast (BINOP A, B)), Undef, ) -> - // (shuffle (BINOP (bitcast A), (bitcast B)), Undef, ) - // - // We do this only if both the bitcast and the BINOP dag nodes have - // one use. Also, perform this transformation only if the new binary - // operation is legal. This is to avoid introducing dag nodes that - // potentially need to be further expanded (or custom lowered) into a - // less optimal sequence of dag nodes. - if (!ExperimentalVectorWideningLegalization && - !DCI.isBeforeLegalize() && DCI.isBeforeLegalizeOps() && - N->getOpcode() == ISD::VECTOR_SHUFFLE && - N->getOperand(0).getOpcode() == ISD::BITCAST && - N->getOperand(1).isUndef() && N->getOperand(0).hasOneUse()) { - SDValue N0 = N->getOperand(0); - SDValue N1 = N->getOperand(1); - - SDValue BC0 = N0.getOperand(0); - EVT SVT = BC0.getValueType(); - unsigned Opcode = BC0.getOpcode(); - unsigned NumElts = VT.getVectorNumElements(); - - if (BC0.hasOneUse() && SVT.isVector() && - SVT.getVectorNumElements() * 2 == NumElts && - TLI.isOperationLegal(Opcode, VT)) { - bool CanFold = false; - switch (Opcode) { - default : break; - case ISD::ADD: - case ISD::SUB: - case ISD::MUL: - // isOperationLegal lies for integer ops on floating point types. - CanFold = VT.isInteger(); - break; - case ISD::FADD: - case ISD::FSUB: - case ISD::FMUL: - // isOperationLegal lies for floating point ops on integer types. - CanFold = VT.isFloatingPoint(); - break; - } - - unsigned SVTNumElts = SVT.getVectorNumElements(); - ShuffleVectorSDNode *SVOp = cast(N); - for (unsigned i = 0, e = SVTNumElts; i != e && CanFold; ++i) - CanFold = SVOp->getMaskElt(i) == (int)(i * 2); - for (unsigned i = SVTNumElts, e = NumElts; i != e && CanFold; ++i) - CanFold = SVOp->getMaskElt(i) < 0; - - if (CanFold) { - SDValue BC00 = DAG.getBitcast(VT, BC0.getOperand(0)); - SDValue BC01 = DAG.getBitcast(VT, BC0.getOperand(1)); - SDValue NewBinOp = DAG.getNode(BC0.getOpcode(), dl, VT, BC00, BC01); - return DAG.getVectorShuffle(VT, dl, NewBinOp, N1, SVOp->getMask()); - } - } - } - // Attempt to combine into a vector load/broadcast. if (SDValue LD = combineToConsecutiveLoads(VT, N, dl, DAG, Subtarget, true)) return LD; @@ -34563,54 +34034,6 @@ static SDValue combineShuffle(SDNode *N, SelectionDAG &DAG, } } - - // Look for a truncating shuffle to v2i32 of a PMULUDQ where one of the - // operands is an extend from v2i32 to v2i64. Turn it into a pmulld. - // FIXME: This can probably go away once we default to widening legalization. - if (!ExperimentalVectorWideningLegalization && - Subtarget.hasSSE41() && VT == MVT::v4i32 && - N->getOpcode() == ISD::VECTOR_SHUFFLE && - N->getOperand(0).getOpcode() == ISD::BITCAST && - N->getOperand(0).getOperand(0).getOpcode() == X86ISD::PMULUDQ) { - SDValue BC = N->getOperand(0); - SDValue MULUDQ = BC.getOperand(0); - ShuffleVectorSDNode *SVOp = cast(N); - ArrayRef Mask = SVOp->getMask(); - if (BC.hasOneUse() && MULUDQ.hasOneUse() && - Mask[0] == 0 && Mask[1] == 2 && Mask[2] == -1 && Mask[3] == -1) { - SDValue Op0 = MULUDQ.getOperand(0); - SDValue Op1 = MULUDQ.getOperand(1); - if (Op0.getOpcode() == ISD::BITCAST && - Op0.getOperand(0).getOpcode() == ISD::VECTOR_SHUFFLE && - Op0.getOperand(0).getValueType() == MVT::v4i32) { - ShuffleVectorSDNode *SVOp0 = - cast(Op0.getOperand(0)); - ArrayRef Mask2 = SVOp0->getMask(); - if (Mask2[0] == 0 && Mask2[1] == -1 && - Mask2[2] == 1 && Mask2[3] == -1) { - Op0 = SVOp0->getOperand(0); - Op1 = DAG.getBitcast(MVT::v4i32, Op1); - Op1 = DAG.getVectorShuffle(MVT::v4i32, dl, Op1, Op1, Mask); - return DAG.getNode(ISD::MUL, dl, MVT::v4i32, Op0, Op1); - } - } - if (Op1.getOpcode() == ISD::BITCAST && - Op1.getOperand(0).getOpcode() == ISD::VECTOR_SHUFFLE && - Op1.getOperand(0).getValueType() == MVT::v4i32) { - ShuffleVectorSDNode *SVOp1 = - cast(Op1.getOperand(0)); - ArrayRef Mask2 = SVOp1->getMask(); - if (Mask2[0] == 0 && Mask2[1] == -1 && - Mask2[2] == 1 && Mask2[3] == -1) { - Op0 = DAG.getBitcast(MVT::v4i32, Op0); - Op0 = DAG.getVectorShuffle(MVT::v4i32, dl, Op0, Op0, Mask); - Op1 = SVOp1->getOperand(0); - return DAG.getNode(ISD::MUL, dl, MVT::v4i32, Op0, Op1); - } - } - } - } - return SDValue(); } @@ -36608,7 +36031,7 @@ static SDValue combineReductionToHorizontal(SDNode *ExtElt, SelectionDAG &DAG, SDLoc DL(ExtElt); - if (ExperimentalVectorWideningLegalization && VecVT == MVT::v8i8) { + if (VecVT == MVT::v8i8) { // Pad with undef. Rdx = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v16i8, Rdx, DAG.getUNDEF(VecVT)); @@ -37314,8 +36737,6 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG, // Since SKX these selects have a proper lowering. if (Subtarget.hasAVX512() && !Subtarget.hasBWI() && CondVT.isVector() && CondVT.getVectorElementType() == MVT::i1 && - (ExperimentalVectorWideningLegalization || - VT.getVectorNumElements() > 4) && (VT.getVectorElementType() == MVT::i8 || VT.getVectorElementType() == MVT::i16)) { Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond); @@ -38192,98 +37613,45 @@ static SDValue reduceVMULWidth(SDNode *N, SelectionDAG &DAG, if ((NumElts % 2) != 0) return SDValue(); - unsigned RegSize = 128; - MVT OpsVT = MVT::getVectorVT(MVT::i16, RegSize / 16); EVT ReducedVT = EVT::getVectorVT(*DAG.getContext(), MVT::i16, NumElts); // Shrink the operands of mul. SDValue NewN0 = DAG.getNode(ISD::TRUNCATE, DL, ReducedVT, N0); SDValue NewN1 = DAG.getNode(ISD::TRUNCATE, DL, ReducedVT, N1); - if (ExperimentalVectorWideningLegalization || - NumElts >= OpsVT.getVectorNumElements()) { - // Generate the lower part of mul: pmullw. For MULU8/MULS8, only the - // lower part is needed. - SDValue MulLo = DAG.getNode(ISD::MUL, DL, ReducedVT, NewN0, NewN1); - if (Mode == MULU8 || Mode == MULS8) - return DAG.getNode((Mode == MULU8) ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND, - DL, VT, MulLo); + // Generate the lower part of mul: pmullw. For MULU8/MULS8, only the + // lower part is needed. + SDValue MulLo = DAG.getNode(ISD::MUL, DL, ReducedVT, NewN0, NewN1); + if (Mode == MULU8 || Mode == MULS8) + return DAG.getNode((Mode == MULU8) ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND, + DL, VT, MulLo); - MVT ResVT = MVT::getVectorVT(MVT::i32, NumElts / 2); - // Generate the higher part of mul: pmulhw/pmulhuw. For MULU16/MULS16, - // the higher part is also needed. - SDValue MulHi = DAG.getNode(Mode == MULS16 ? ISD::MULHS : ISD::MULHU, DL, - ReducedVT, NewN0, NewN1); - - // Repack the lower part and higher part result of mul into a wider - // result. - // Generate shuffle functioning as punpcklwd. - SmallVector ShuffleMask(NumElts); - for (unsigned i = 0, e = NumElts / 2; i < e; i++) { - ShuffleMask[2 * i] = i; - ShuffleMask[2 * i + 1] = i + NumElts; - } - SDValue ResLo = - DAG.getVectorShuffle(ReducedVT, DL, MulLo, MulHi, ShuffleMask); - ResLo = DAG.getBitcast(ResVT, ResLo); - // Generate shuffle functioning as punpckhwd. - for (unsigned i = 0, e = NumElts / 2; i < e; i++) { - ShuffleMask[2 * i] = i + NumElts / 2; - ShuffleMask[2 * i + 1] = i + NumElts * 3 / 2; - } - SDValue ResHi = - DAG.getVectorShuffle(ReducedVT, DL, MulLo, MulHi, ShuffleMask); - ResHi = DAG.getBitcast(ResVT, ResHi); - return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ResLo, ResHi); - } - - // When VT.getVectorNumElements() < OpsVT.getVectorNumElements(), we want - // to legalize the mul explicitly because implicit legalization for type - // <4 x i16> to <4 x i32> sometimes involves unnecessary unpack - // instructions which will not exist when we explicitly legalize it by - // extending <4 x i16> to <8 x i16> (concatenating the <4 x i16> val with - // <4 x i16> undef). - // - // Legalize the operands of mul. - // FIXME: We may be able to handle non-concatenated vectors by insertion. - unsigned ReducedSizeInBits = ReducedVT.getSizeInBits(); - if ((RegSize % ReducedSizeInBits) != 0) - return SDValue(); - - SmallVector Ops(RegSize / ReducedSizeInBits, - DAG.getUNDEF(ReducedVT)); - Ops[0] = NewN0; - NewN0 = DAG.getNode(ISD::CONCAT_VECTORS, DL, OpsVT, Ops); - Ops[0] = NewN1; - NewN1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, OpsVT, Ops); - - if (Mode == MULU8 || Mode == MULS8) { - // Generate lower part of mul: pmullw. For MULU8/MULS8, only the lower - // part is needed. - SDValue Mul = DAG.getNode(ISD::MUL, DL, OpsVT, NewN0, NewN1); - - // convert the type of mul result to VT. - MVT ResVT = MVT::getVectorVT(MVT::i32, RegSize / 32); - SDValue Res = DAG.getNode(Mode == MULU8 ? ISD::ZERO_EXTEND_VECTOR_INREG - : ISD::SIGN_EXTEND_VECTOR_INREG, - DL, ResVT, Mul); - return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res, - DAG.getIntPtrConstant(0, DL)); - } - - // Generate the lower and higher part of mul: pmulhw/pmulhuw. For - // MULU16/MULS16, both parts are needed. - SDValue MulLo = DAG.getNode(ISD::MUL, DL, OpsVT, NewN0, NewN1); + MVT ResVT = MVT::getVectorVT(MVT::i32, NumElts / 2); + // Generate the higher part of mul: pmulhw/pmulhuw. For MULU16/MULS16, + // the higher part is also needed. SDValue MulHi = DAG.getNode(Mode == MULS16 ? ISD::MULHS : ISD::MULHU, DL, - OpsVT, NewN0, NewN1); + ReducedVT, NewN0, NewN1); // Repack the lower part and higher part result of mul into a wider - // result. Make sure the type of mul result is VT. - MVT ResVT = MVT::getVectorVT(MVT::i32, RegSize / 32); - SDValue Res = getUnpackl(DAG, DL, OpsVT, MulLo, MulHi); - Res = DAG.getBitcast(ResVT, Res); - return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res, - DAG.getIntPtrConstant(0, DL)); + // result. + // Generate shuffle functioning as punpcklwd. + SmallVector ShuffleMask(NumElts); + for (unsigned i = 0, e = NumElts / 2; i < e; i++) { + ShuffleMask[2 * i] = i; + ShuffleMask[2 * i + 1] = i + NumElts; + } + SDValue ResLo = + DAG.getVectorShuffle(ReducedVT, DL, MulLo, MulHi, ShuffleMask); + ResLo = DAG.getBitcast(ResVT, ResLo); + // Generate shuffle functioning as punpckhwd. + for (unsigned i = 0, e = NumElts / 2; i < e; i++) { + ShuffleMask[2 * i] = i + NumElts / 2; + ShuffleMask[2 * i + 1] = i + NumElts * 3 / 2; + } + SDValue ResHi = + DAG.getVectorShuffle(ReducedVT, DL, MulLo, MulHi, ShuffleMask); + ResHi = DAG.getBitcast(ResVT, ResHi); + return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ResLo, ResHi); } static SDValue combineMulSpecial(uint64_t MulAmt, SDNode *N, SelectionDAG &DAG, @@ -38391,8 +37759,7 @@ static SDValue combineMulToPMADDWD(SDNode *N, SelectionDAG &DAG, // Make sure the vXi16 type is legal. This covers the AVX512 without BWI case. // Also allow v2i32 if it will be widened. MVT WVT = MVT::getVectorVT(MVT::i16, 2 * VT.getVectorNumElements()); - if (!((ExperimentalVectorWideningLegalization && VT == MVT::v2i32) || - DAG.getTargetLoweringInfo().isTypeLegal(WVT))) + if (VT != MVT::v2i32 && !DAG.getTargetLoweringInfo().isTypeLegal(WVT)) return SDValue(); SDValue N0 = N->getOperand(0); @@ -40649,93 +40016,7 @@ static SDValue combineMaskedLoad(SDNode *N, SelectionDAG &DAG, return Blend; } - if (ExperimentalVectorWideningLegalization) - return SDValue(); - - if (Mld->getExtensionType() != ISD::EXTLOAD) - return SDValue(); - - // Resolve extending loads. - EVT VT = Mld->getValueType(0); - unsigned NumElems = VT.getVectorNumElements(); - EVT LdVT = Mld->getMemoryVT(); - SDLoc dl(Mld); - - assert(LdVT != VT && "Cannot extend to the same type"); - unsigned ToSz = VT.getScalarSizeInBits(); - unsigned FromSz = LdVT.getScalarSizeInBits(); - // From/To sizes and ElemCount must be pow of two. - assert (isPowerOf2_32(NumElems * FromSz * ToSz) && - "Unexpected size for extending masked load"); - - unsigned SizeRatio = ToSz / FromSz; - assert(SizeRatio * NumElems * FromSz == VT.getSizeInBits()); - - // Create a type on which we perform the shuffle. - EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(), - LdVT.getScalarType(), NumElems*SizeRatio); - assert(WideVecVT.getSizeInBits() == VT.getSizeInBits()); - - // Convert PassThru value. - SDValue WidePassThru = DAG.getBitcast(WideVecVT, Mld->getPassThru()); - if (!Mld->getPassThru().isUndef()) { - SmallVector ShuffleVec(NumElems * SizeRatio, -1); - for (unsigned i = 0; i != NumElems; ++i) - ShuffleVec[i] = i * SizeRatio; - - // Can't shuffle using an illegal type. - assert(DAG.getTargetLoweringInfo().isTypeLegal(WideVecVT) && - "WideVecVT should be legal"); - WidePassThru = DAG.getVectorShuffle(WideVecVT, dl, WidePassThru, - DAG.getUNDEF(WideVecVT), ShuffleVec); - } - - // Prepare the new mask. - SDValue NewMask; - SDValue Mask = Mld->getMask(); - if (Mask.getValueType() == VT) { - // Mask and original value have the same type. - NewMask = DAG.getBitcast(WideVecVT, Mask); - SmallVector ShuffleVec(NumElems * SizeRatio, -1); - for (unsigned i = 0; i != NumElems; ++i) - ShuffleVec[i] = i * SizeRatio; - for (unsigned i = NumElems; i != NumElems * SizeRatio; ++i) - ShuffleVec[i] = NumElems * SizeRatio; - NewMask = DAG.getVectorShuffle(WideVecVT, dl, NewMask, - DAG.getConstant(0, dl, WideVecVT), - ShuffleVec); - } else { - assert(Mask.getValueType().getVectorElementType() == MVT::i1); - unsigned WidenNumElts = NumElems*SizeRatio; - unsigned MaskNumElts = VT.getVectorNumElements(); - EVT NewMaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, - WidenNumElts); - - unsigned NumConcat = WidenNumElts / MaskNumElts; - SDValue ZeroVal = DAG.getConstant(0, dl, Mask.getValueType()); - SmallVector Ops(NumConcat, ZeroVal); - Ops[0] = Mask; - NewMask = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewMaskVT, Ops); - } - - SDValue WideLd = DAG.getMaskedLoad(WideVecVT, dl, Mld->getChain(), - Mld->getBasePtr(), NewMask, WidePassThru, - Mld->getMemoryVT(), Mld->getMemOperand(), - ISD::NON_EXTLOAD); - - SDValue SlicedVec = DAG.getBitcast(WideVecVT, WideLd); - SmallVector ShuffleVec(NumElems * SizeRatio, -1); - for (unsigned i = 0; i != NumElems; ++i) - ShuffleVec[i * SizeRatio] = i; - - // Can't shuffle using an illegal type. - assert(DAG.getTargetLoweringInfo().isTypeLegal(WideVecVT) && - "WideVecVT should be legal"); - SlicedVec = DAG.getVectorShuffle(WideVecVT, dl, SlicedVec, - DAG.getUNDEF(WideVecVT), ShuffleVec); - SlicedVec = DAG.getBitcast(VT, SlicedVec); - - return DCI.CombineTo(N, SlicedVec, WideLd.getValue(1), true); + return SDValue(); } /// If exactly one element of the mask is set for a non-truncating masked store, @@ -40773,111 +40054,34 @@ static SDValue combineMaskedStore(SDNode *N, SelectionDAG &DAG, return SDValue(); EVT VT = Mst->getValue().getValueType(); - EVT StVT = Mst->getMemoryVT(); SDLoc dl(Mst); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - if (!Mst->isTruncatingStore()) { - if (SDValue ScalarStore = reduceMaskedStoreToScalarStore(Mst, DAG)) - return ScalarStore; - - // If the mask value has been legalized to a non-boolean vector, try to - // simplify ops leading up to it. We only demand the MSB of each lane. - SDValue Mask = Mst->getMask(); - if (Mask.getScalarValueSizeInBits() != 1) { - APInt DemandedMask(APInt::getSignMask(VT.getScalarSizeInBits())); - if (TLI.SimplifyDemandedBits(Mask, DemandedMask, DCI)) - return SDValue(N, 0); - } - - SDValue Value = Mst->getValue(); - if (Value.getOpcode() == ISD::TRUNCATE && Value.getNode()->hasOneUse() && - TLI.isTruncStoreLegal(Value.getOperand(0).getValueType(), - Mst->getMemoryVT())) { - return DAG.getMaskedStore(Mst->getChain(), SDLoc(N), Value.getOperand(0), - Mst->getBasePtr(), Mask, - Mst->getMemoryVT(), Mst->getMemOperand(), true); - } - - return SDValue(); - } - - if (ExperimentalVectorWideningLegalization) + if (Mst->isTruncatingStore()) return SDValue(); - // Resolve truncating stores. - unsigned NumElems = VT.getVectorNumElements(); + if (SDValue ScalarStore = reduceMaskedStoreToScalarStore(Mst, DAG)) + return ScalarStore; - assert(StVT != VT && "Cannot truncate to the same type"); - unsigned FromSz = VT.getScalarSizeInBits(); - unsigned ToSz = StVT.getScalarSizeInBits(); - - // The truncating store is legal in some cases. For example - // vpmovqb, vpmovqw, vpmovqd, vpmovdb, vpmovdw - // are designated for truncate store. - // In this case we don't need any further transformations. - if (TLI.isTruncStoreLegal(VT, StVT)) - return SDValue(); - - // From/To sizes and ElemCount must be pow of two. - assert (isPowerOf2_32(NumElems * FromSz * ToSz) && - "Unexpected size for truncating masked store"); - // We are going to use the original vector elt for storing. - // Accumulated smaller vector elements must be a multiple of the store size. - assert (((NumElems * FromSz) % ToSz) == 0 && - "Unexpected ratio for truncating masked store"); - - unsigned SizeRatio = FromSz / ToSz; - assert(SizeRatio * NumElems * ToSz == VT.getSizeInBits()); - - // Create a type on which we perform the shuffle. - EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(), - StVT.getScalarType(), NumElems*SizeRatio); - - assert(WideVecVT.getSizeInBits() == VT.getSizeInBits()); - - SDValue WideVec = DAG.getBitcast(WideVecVT, Mst->getValue()); - SmallVector ShuffleVec(NumElems * SizeRatio, -1); - for (unsigned i = 0; i != NumElems; ++i) - ShuffleVec[i] = i * SizeRatio; - - // Can't shuffle using an illegal type. - assert(DAG.getTargetLoweringInfo().isTypeLegal(WideVecVT) && - "WideVecVT should be legal"); - - SDValue TruncatedVal = DAG.getVectorShuffle(WideVecVT, dl, WideVec, - DAG.getUNDEF(WideVecVT), - ShuffleVec); - - SDValue NewMask; + // If the mask value has been legalized to a non-boolean vector, try to + // simplify ops leading up to it. We only demand the MSB of each lane. SDValue Mask = Mst->getMask(); - if (Mask.getValueType() == VT) { - // Mask and original value have the same type. - NewMask = DAG.getBitcast(WideVecVT, Mask); - for (unsigned i = 0; i != NumElems; ++i) - ShuffleVec[i] = i * SizeRatio; - for (unsigned i = NumElems; i != NumElems*SizeRatio; ++i) - ShuffleVec[i] = NumElems*SizeRatio; - NewMask = DAG.getVectorShuffle(WideVecVT, dl, NewMask, - DAG.getConstant(0, dl, WideVecVT), - ShuffleVec); - } else { - assert(Mask.getValueType().getVectorElementType() == MVT::i1); - unsigned WidenNumElts = NumElems*SizeRatio; - unsigned MaskNumElts = VT.getVectorNumElements(); - EVT NewMaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, - WidenNumElts); - - unsigned NumConcat = WidenNumElts / MaskNumElts; - SDValue ZeroVal = DAG.getConstant(0, dl, Mask.getValueType()); - SmallVector Ops(NumConcat, ZeroVal); - Ops[0] = Mask; - NewMask = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewMaskVT, Ops); + if (Mask.getScalarValueSizeInBits() != 1) { + APInt DemandedMask(APInt::getSignMask(VT.getScalarSizeInBits())); + if (TLI.SimplifyDemandedBits(Mask, DemandedMask, DCI)) + return SDValue(N, 0); } - return DAG.getMaskedStore(Mst->getChain(), dl, TruncatedVal, - Mst->getBasePtr(), NewMask, StVT, - Mst->getMemOperand(), false); + SDValue Value = Mst->getValue(); + if (Value.getOpcode() == ISD::TRUNCATE && Value.getNode()->hasOneUse() && + TLI.isTruncStoreLegal(Value.getOperand(0).getValueType(), + Mst->getMemoryVT())) { + return DAG.getMaskedStore(Mst->getChain(), SDLoc(N), Value.getOperand(0), + Mst->getBasePtr(), Mask, + Mst->getMemoryVT(), Mst->getMemOperand(), true); + } + + return SDValue(); } static SDValue combineStore(SDNode *N, SelectionDAG &DAG, @@ -41005,41 +40209,6 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG, MVT::v16i8, St->getMemOperand()); } - // Look for a truncating store to a less than 128 bit vector that has been - // truncated from an any_extend_inreg from a 128 bit vector with the same - // element size. We can use a 64/32/16-bit extractelement and store that. - // Disabling this when widening legalization is in effect since the trunc - // store would have been unlikely to be created in that case. Only doing this - // when truncstore is legal since it would otherwise be decomposed below and - // then combined away. - if (St->isTruncatingStore() && TLI.isTruncStoreLegal(VT, StVT) && - StoredVal.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG && - StoredVal.getValueType().is128BitVector() && - !ExperimentalVectorWideningLegalization) { - EVT OrigVT = StoredVal.getOperand(0).getValueType(); - if (OrigVT.is128BitVector() && - OrigVT.getVectorElementType() == StVT.getVectorElementType()) { - unsigned StoreSize = StVT.getSizeInBits(); - assert((128 % StoreSize == 0) && "Unexpected store size!"); - MVT IntVT = MVT::getIntegerVT(StoreSize); - MVT CastVT = MVT::getVectorVT(IntVT, 128 / StoreSize); - StoredVal = DAG.getBitcast(CastVT, StoredVal.getOperand(0)); - // Use extract_store for the 64-bit case to support 32-bit targets. - if (IntVT == MVT::i64) { - SDVTList Tys = DAG.getVTList(MVT::Other); - SDValue Ops[] = {St->getChain(), StoredVal, St->getBasePtr()}; - return DAG.getMemIntrinsicNode(X86ISD::VEXTRACT_STORE, dl, Tys, Ops, - IntVT, St->getMemOperand()); - } - - // Otherwise just use an extract and store. - StoredVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, IntVT, StoredVal, - DAG.getIntPtrConstant(0, dl)); - return DAG.getStore(St->getChain(), dl, StoredVal, St->getBasePtr(), - St->getMemOperand()); - } - } - // Optimize trunc store (of multiple scalars) to shuffle and store. // First, pack all of the elements in one place. Next, store to memory // in fewer chunks. @@ -41066,85 +40235,7 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG, St->getMemoryVT(), St->getMemOperand(), DAG); } - if (ExperimentalVectorWideningLegalization) - return SDValue(); - - unsigned NumElems = VT.getVectorNumElements(); - assert(StVT != VT && "Cannot truncate to the same type"); - unsigned FromSz = VT.getScalarSizeInBits(); - unsigned ToSz = StVT.getScalarSizeInBits(); - - // The truncating store is legal in some cases. For example - // vpmovqb, vpmovqw, vpmovqd, vpmovdb, vpmovdw - // are designated for truncate store. - // In this case we don't need any further transformations. - if (TLI.isTruncStoreLegalOrCustom(VT, StVT)) - return SDValue(); - - // From, To sizes and ElemCount must be pow of two - if (!isPowerOf2_32(NumElems * FromSz * ToSz)) return SDValue(); - // We are going to use the original vector elt for storing. - // Accumulated smaller vector elements must be a multiple of the store size. - if (0 != (NumElems * FromSz) % ToSz) return SDValue(); - - unsigned SizeRatio = FromSz / ToSz; - - assert(SizeRatio * NumElems * ToSz == VT.getSizeInBits()); - - // Create a type on which we perform the shuffle - EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(), - StVT.getScalarType(), NumElems*SizeRatio); - - assert(WideVecVT.getSizeInBits() == VT.getSizeInBits()); - - SDValue WideVec = DAG.getBitcast(WideVecVT, St->getValue()); - SmallVector ShuffleVec(NumElems * SizeRatio, -1); - for (unsigned i = 0; i != NumElems; ++i) - ShuffleVec[i] = i * SizeRatio; - - // Can't shuffle using an illegal type. - if (!TLI.isTypeLegal(WideVecVT)) - return SDValue(); - - SDValue Shuff = DAG.getVectorShuffle(WideVecVT, dl, WideVec, - DAG.getUNDEF(WideVecVT), - ShuffleVec); - // At this point all of the data is stored at the bottom of the - // register. We now need to save it to mem. - - // Find the largest store unit - MVT StoreType = MVT::i8; - for (MVT Tp : MVT::integer_valuetypes()) { - if (TLI.isTypeLegal(Tp) && Tp.getSizeInBits() <= NumElems * ToSz) - StoreType = Tp; - } - - // On 32bit systems, we can't save 64bit integers. Try bitcasting to F64. - if (TLI.isTypeLegal(MVT::f64) && StoreType.getSizeInBits() < 64 && - (64 <= NumElems * ToSz)) - StoreType = MVT::f64; - - // Bitcast the original vector into a vector of store-size units - EVT StoreVecVT = EVT::getVectorVT(*DAG.getContext(), - StoreType, VT.getSizeInBits()/StoreType.getSizeInBits()); - assert(StoreVecVT.getSizeInBits() == VT.getSizeInBits()); - SDValue ShuffWide = DAG.getBitcast(StoreVecVT, Shuff); - SmallVector Chains; - SDValue Ptr = St->getBasePtr(); - - // Perform one or more big stores into memory. - for (unsigned i=0, e=(ToSz*NumElems)/StoreType.getSizeInBits(); i!=e; ++i) { - SDValue SubVec = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, - StoreType, ShuffWide, - DAG.getIntPtrConstant(i, dl)); - SDValue Ch = - DAG.getStore(St->getChain(), dl, SubVec, Ptr, St->getPointerInfo(), - St->getAlignment(), St->getMemOperand()->getFlags()); - Ptr = DAG.getMemBasePlusOffset(Ptr, StoreType.getStoreSize(), dl); - Chains.push_back(Ch); - } - - return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains); + return SDValue(); } // Turn load->store of MMX types into GPR load/stores. This avoids clobbering @@ -41160,9 +40251,7 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG, bool NoImplicitFloatOps = F.hasFnAttribute(Attribute::NoImplicitFloat); bool F64IsLegal = !Subtarget.useSoftFloat() && !NoImplicitFloatOps && Subtarget.hasSSE2(); - if (((VT.isVector() && !VT.isFloatingPoint() && - !ExperimentalVectorWideningLegalization) || - (VT == MVT::i64 && F64IsLegal && !Subtarget.is64Bit())) && + if ((VT == MVT::i64 && F64IsLegal && !Subtarget.is64Bit()) && isa(St->getValue()) && cast(St->getValue())->isSimple() && St->getChain().hasOneUse() && St->isSimple()) { @@ -41677,9 +40766,7 @@ static SDValue combinePMULH(SDValue Src, EVT VT, const SDLoc &DL, // Only handle vXi16 types that are at least 128-bits unless they will be // widened. - if (!VT.isVector() || VT.getVectorElementType() != MVT::i16 || - (!ExperimentalVectorWideningLegalization && - VT.getVectorNumElements() < 8)) + if (!VT.isVector() || VT.getVectorElementType() != MVT::i16) return SDValue(); // Input type should be vXi32. @@ -42793,127 +41880,6 @@ combineToExtendBoolVectorInReg(SDNode *N, SelectionDAG &DAG, DAG.getConstant(EltSizeInBits - 1, DL, VT)); } -/// Convert a SEXT or ZEXT of a vector to a SIGN_EXTEND_VECTOR_INREG or -/// ZERO_EXTEND_VECTOR_INREG, this requires the splitting (or concatenating -/// with UNDEFs) of the input to vectors of the same size as the target type -/// which then extends the lowest elements. -static SDValue combineToExtendVectorInReg(SDNode *N, SelectionDAG &DAG, - TargetLowering::DAGCombinerInfo &DCI, - const X86Subtarget &Subtarget) { - if (ExperimentalVectorWideningLegalization) - return SDValue(); - - unsigned Opcode = N->getOpcode(); - // TODO - add ANY_EXTEND support. - if (Opcode != ISD::SIGN_EXTEND && Opcode != ISD::ZERO_EXTEND) - return SDValue(); - if (!DCI.isBeforeLegalizeOps()) - return SDValue(); - if (!Subtarget.hasSSE2()) - return SDValue(); - - SDValue N0 = N->getOperand(0); - EVT VT = N->getValueType(0); - EVT SVT = VT.getScalarType(); - EVT InVT = N0.getValueType(); - EVT InSVT = InVT.getScalarType(); - - // FIXME: Generic DAGCombiner previously had a bug that would cause a - // sign_extend of setcc to sometimes return the original node and tricked it - // into thinking CombineTo was used which prevented the target combines from - // running. - // Earlying out here to avoid regressions like this - // (v4i32 (sext (v4i1 (setcc (v4i16))))) - // Becomes - // (v4i32 (sext_invec (v8i16 (concat (v4i16 (setcc (v4i16))), undef)))) - // Type legalized to - // (v4i32 (sext_invec (v8i16 (trunc_invec (v4i32 (setcc (v4i32))))))) - // Leading to a packssdw+pmovsxwd - // We could write a DAG combine to fix this, but really we shouldn't be - // creating sext_invec that's forcing v8i16 into the DAG. - if (N0.getOpcode() == ISD::SETCC) - return SDValue(); - - // Input type must be a vector and we must be extending legal integer types. - if (!VT.isVector() || VT.getVectorNumElements() < 2) - return SDValue(); - if (SVT != MVT::i64 && SVT != MVT::i32 && SVT != MVT::i16) - return SDValue(); - if (InSVT != MVT::i32 && InSVT != MVT::i16 && InSVT != MVT::i8) - return SDValue(); - - // If the input/output types are both legal then we have at least AVX1 and - // we will be able to use SIGN_EXTEND/ZERO_EXTEND directly. - if (DAG.getTargetLoweringInfo().isTypeLegal(VT) && - DAG.getTargetLoweringInfo().isTypeLegal(InVT)) - return SDValue(); - - SDLoc DL(N); - - auto ExtendVecSize = [&DAG](const SDLoc &DL, SDValue N, unsigned Size) { - EVT SrcVT = N.getValueType(); - EVT DstVT = EVT::getVectorVT(*DAG.getContext(), SrcVT.getScalarType(), - Size / SrcVT.getScalarSizeInBits()); - SmallVector Opnds(Size / SrcVT.getSizeInBits(), - DAG.getUNDEF(SrcVT)); - Opnds[0] = N; - return DAG.getNode(ISD::CONCAT_VECTORS, DL, DstVT, Opnds); - }; - - // If target-size is less than 128-bits, extend to a type that would extend - // to 128 bits, extend that and extract the original target vector. - if (VT.getSizeInBits() < 128 && !(128 % VT.getSizeInBits())) { - unsigned Scale = 128 / VT.getSizeInBits(); - EVT ExVT = - EVT::getVectorVT(*DAG.getContext(), SVT, 128 / SVT.getSizeInBits()); - SDValue Ex = ExtendVecSize(DL, N0, Scale * InVT.getSizeInBits()); - SDValue SExt = DAG.getNode(Opcode, DL, ExVT, Ex); - return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, SExt, - DAG.getIntPtrConstant(0, DL)); - } - - // If target-size is 128-bits (or 256-bits on AVX target), then convert to - // ISD::*_EXTEND_VECTOR_INREG which ensures lowering to X86ISD::V*EXT. - // Also use this if we don't have SSE41 to allow the legalizer do its job. - if (!Subtarget.hasSSE41() || VT.is128BitVector() || - (VT.is256BitVector() && Subtarget.hasAVX()) || - (VT.is512BitVector() && Subtarget.useAVX512Regs())) { - SDValue ExOp = ExtendVecSize(DL, N0, VT.getSizeInBits()); - Opcode = getOpcode_EXTEND_VECTOR_INREG(Opcode); - return DAG.getNode(Opcode, DL, VT, ExOp); - } - - auto SplitAndExtendInReg = [&](unsigned SplitSize) { - unsigned NumVecs = VT.getSizeInBits() / SplitSize; - unsigned NumSubElts = SplitSize / SVT.getSizeInBits(); - EVT SubVT = EVT::getVectorVT(*DAG.getContext(), SVT, NumSubElts); - EVT InSubVT = EVT::getVectorVT(*DAG.getContext(), InSVT, NumSubElts); - - unsigned IROpc = getOpcode_EXTEND_VECTOR_INREG(Opcode); - SmallVector Opnds; - for (unsigned i = 0, Offset = 0; i != NumVecs; ++i, Offset += NumSubElts) { - SDValue SrcVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InSubVT, N0, - DAG.getIntPtrConstant(Offset, DL)); - SrcVec = ExtendVecSize(DL, SrcVec, SplitSize); - SrcVec = DAG.getNode(IROpc, DL, SubVT, SrcVec); - Opnds.push_back(SrcVec); - } - return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Opnds); - }; - - // On pre-AVX targets, split into 128-bit nodes of - // ISD::*_EXTEND_VECTOR_INREG. - if (!Subtarget.hasAVX() && !(VT.getSizeInBits() % 128)) - return SplitAndExtendInReg(128); - - // On pre-AVX512 targets, split into 256-bit nodes of - // ISD::*_EXTEND_VECTOR_INREG. - if (!Subtarget.useAVX512Regs() && !(VT.getSizeInBits() % 256)) - return SplitAndExtendInReg(256); - - return SDValue(); -} - // Attempt to combine a (sext/zext (setcc)) to a setcc with a xmm/ymm/zmm // result type. static SDValue combineExtSetcc(SDNode *N, SelectionDAG &DAG, @@ -42984,9 +41950,6 @@ static SDValue combineSext(SDNode *N, SelectionDAG &DAG, return DAG.getNode(ISD::SUB, DL, VT, Zext, DAG.getConstant(1, DL, VT)); } - if (SDValue V = combineToExtendVectorInReg(N, DAG, DCI, Subtarget)) - return V; - if (SDValue V = combineToExtendBoolVectorInReg(N, DAG, DCI, Subtarget)) return V; @@ -43122,9 +42085,6 @@ static SDValue combineZext(SDNode *N, SelectionDAG &DAG, if (SDValue V = combineExtSetcc(N, DAG, Subtarget)) return V; - if (SDValue V = combineToExtendVectorInReg(N, DAG, DCI, Subtarget)) - return V; - if (SDValue V = combineToExtendBoolVectorInReg(N, DAG, DCI, Subtarget)) return V; @@ -43301,8 +42261,6 @@ static SDValue combineSetCC(SDNode *N, SelectionDAG &DAG, // go through type promotion to a 128-bit vector. if (Subtarget.hasAVX512() && !Subtarget.hasBWI() && VT.isVector() && VT.getVectorElementType() == MVT::i1 && - (ExperimentalVectorWideningLegalization || - VT.getVectorNumElements() > 4) && (OpVT.getVectorElementType() == MVT::i8 || OpVT.getVectorElementType() == MVT::i16)) { SDValue Setcc = DAG.getNode(ISD::SETCC, DL, OpVT, LHS, RHS, @@ -44215,11 +43173,7 @@ static SDValue combineLoopSADPattern(SDNode *N, SelectionDAG &DAG, // result to v2i32 which will be removed by type legalization. If we/ widen // narrow vectors then we bitcast to v4i32 and extract v2i32. MVT ResVT = MVT::getVectorVT(MVT::i32, Sad.getValueSizeInBits() / 32); - if (ExperimentalVectorWideningLegalization || - VT.getSizeInBits() >= ResVT.getSizeInBits()) - Sad = DAG.getNode(ISD::BITCAST, DL, ResVT, Sad); - else - Sad = DAG.getNode(ISD::TRUNCATE, DL, VT, Sad); + Sad = DAG.getNode(ISD::BITCAST, DL, ResVT, Sad); if (VT.getSizeInBits() > ResVT.getSizeInBits()) { // Fill the upper elements with zero to match the add width. @@ -44228,8 +43182,7 @@ static SDValue combineLoopSADPattern(SDNode *N, SelectionDAG &DAG, SmallVector Ops(NumConcats, DAG.getConstant(0, DL, ResVT)); Ops[0] = Sad; Sad = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Ops); - } else if (ExperimentalVectorWideningLegalization && - VT.getSizeInBits() < ResVT.getSizeInBits()) { + } else if (VT.getSizeInBits() < ResVT.getSizeInBits()) { Sad = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Sad, DAG.getIntPtrConstant(0, DL)); } @@ -45246,15 +44199,6 @@ static SDValue combineExtInVec(SDNode *N, SelectionDAG &DAG, } } - // Combine (ext_invec (ext_invec X)) -> (ext_invec X) - // Disabling for widening legalization for now. We can enable if we find a - // case that needs it. Otherwise it can be deleted when we switch to - // widening legalization. - if (!ExperimentalVectorWideningLegalization && - In.getOpcode() == N->getOpcode() && - TLI.isTypeLegal(VT) && TLI.isTypeLegal(In.getOperand(0).getValueType())) - return DAG.getNode(N->getOpcode(), SDLoc(N), VT, In.getOperand(0)); - // Attempt to combine as a shuffle. // TODO: SSE41 support if (Subtarget.hasAVX() && N->getOpcode() != ISD::SIGN_EXTEND_VECTOR_INREG) { diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp index ebbf6d0702e8..83c78451253b 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -50,8 +50,6 @@ using namespace llvm; #define DEBUG_TYPE "x86tti" -extern cl::opt ExperimentalVectorWideningLegalization; - //===----------------------------------------------------------------------===// // // X86 cost model. @@ -920,8 +918,7 @@ int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, // FIXME: We can use permq for 64-bit or larger extracts from 256-bit // vectors. int OrigSubElts = SubTp->getVectorNumElements(); - if (ExperimentalVectorWideningLegalization && - NumSubElts > OrigSubElts && + if (NumSubElts > OrigSubElts && (Index % OrigSubElts) == 0 && (NumSubElts % OrigSubElts) == 0 && LT.second.getVectorElementType() == SubLT.second.getVectorElementType() && @@ -1333,12 +1330,6 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, // TODO: For AVX512DQ + AVX512VL, we also have cheap casts for 128-bit and // 256-bit wide vectors. - // Used with widening legalization - static const TypeConversionCostTblEntry AVX512FConversionTblWide[] = { - { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i8, 1 }, - { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i8, 1 }, - }; - static const TypeConversionCostTblEntry AVX512FConversionTbl[] = { { ISD::FP_EXTEND, MVT::v8f64, MVT::v8f32, 1 }, { ISD::FP_EXTEND, MVT::v8f64, MVT::v16f32, 3 }, @@ -1356,6 +1347,8 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i8, 1 }, { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i16, 1 }, { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i16, 1 }, + { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i8, 1 }, + { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i8, 1 }, { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i16, 1 }, { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i16, 1 }, { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i32, 1 }, @@ -1410,28 +1403,19 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, { ISD::FP_TO_UINT, MVT::v16i8, MVT::v16f32, 2 }, }; - static const TypeConversionCostTblEntry AVX2ConversionTblWide[] = { - { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i8, 1 }, - { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i8, 1 }, - { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 1 }, - { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i8, 1 }, - { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 1 }, - { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i16, 1 }, - }; - static const TypeConversionCostTblEntry AVX2ConversionTbl[] = { { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i1, 3 }, { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i1, 3 }, { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i1, 3 }, { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i1, 3 }, - { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i8, 3 }, - { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i8, 3 }, - { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 3 }, - { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i8, 3 }, + { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i8, 1 }, + { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i8, 1 }, + { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 1 }, + { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i8, 1 }, { ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i8, 1 }, { ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i8, 1 }, - { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 3 }, - { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i16, 3 }, + { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 1 }, + { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i16, 1 }, { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 1 }, { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 1 }, { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i32, 1 }, @@ -1450,24 +1434,18 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i32, 8 }, }; - static const TypeConversionCostTblEntry AVXConversionTblWide[] = { - { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i8, 4 }, - { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 4 }, - { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 4 }, - }; - static const TypeConversionCostTblEntry AVXConversionTbl[] = { { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i1, 6 }, { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i1, 4 }, { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i1, 7 }, { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i1, 4 }, - { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i8, 6 }, + { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i8, 4 }, { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i8, 4 }, - { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 7 }, + { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 4 }, { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i8, 4 }, { ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i8, 4 }, { ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i8, 4 }, - { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 6 }, + { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 4 }, { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i16, 3 }, { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 4 }, { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 4 }, @@ -1574,11 +1552,6 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, { ISD::UINT_TO_FP, MVT::f64, MVT::i64, 4 }, }; - static const TypeConversionCostTblEntry SSE2ConversionTblWide[] = { - { ISD::SINT_TO_FP, MVT::v2f64, MVT::v4i32, 2*10 }, - { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i32, 2*10 }, - }; - static const TypeConversionCostTblEntry SSE2ConversionTbl[] = { // These are somewhat magic numbers justified by looking at the output of // Intel's IACA, running some kernels and making sure when we take @@ -1588,7 +1561,8 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, { ISD::SINT_TO_FP, MVT::v4f32, MVT::v8i16, 15 }, { ISD::SINT_TO_FP, MVT::v2f64, MVT::v8i16, 8*10 }, { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 5 }, - { ISD::SINT_TO_FP, MVT::v2f64, MVT::v4i32, 4*10 }, + { ISD::SINT_TO_FP, MVT::v2f64, MVT::v4i32, 2*10 }, + { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i32, 2*10 }, { ISD::SINT_TO_FP, MVT::v4f32, MVT::v2i64, 15 }, { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i64, 2*10 }, @@ -1652,13 +1626,6 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, std::pair LTSrc = TLI->getTypeLegalizationCost(DL, Src); std::pair LTDest = TLI->getTypeLegalizationCost(DL, Dst); - if (ST->hasSSE2() && !ST->hasAVX() && - ExperimentalVectorWideningLegalization) { - if (const auto *Entry = ConvertCostTableLookup(SSE2ConversionTblWide, ISD, - LTDest.second, LTSrc.second)) - return LTSrc.first * Entry->Cost; - } - if (ST->hasSSE2() && !ST->hasAVX()) { if (const auto *Entry = ConvertCostTableLookup(SSE2ConversionTbl, ISD, LTDest.second, LTSrc.second)) @@ -1690,35 +1657,18 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, SimpleDstTy, SimpleSrcTy)) return Entry->Cost; - if (ST->hasAVX512() && ExperimentalVectorWideningLegalization) - if (const auto *Entry = ConvertCostTableLookup(AVX512FConversionTblWide, ISD, - SimpleDstTy, SimpleSrcTy)) - return Entry->Cost; - if (ST->hasAVX512()) if (const auto *Entry = ConvertCostTableLookup(AVX512FConversionTbl, ISD, SimpleDstTy, SimpleSrcTy)) return Entry->Cost; } - if (ST->hasAVX2() && ExperimentalVectorWideningLegalization) { - if (const auto *Entry = ConvertCostTableLookup(AVX2ConversionTblWide, ISD, - SimpleDstTy, SimpleSrcTy)) - return Entry->Cost; - } - if (ST->hasAVX2()) { if (const auto *Entry = ConvertCostTableLookup(AVX2ConversionTbl, ISD, SimpleDstTy, SimpleSrcTy)) return Entry->Cost; } - if (ST->hasAVX() && ExperimentalVectorWideningLegalization) { - if (const auto *Entry = ConvertCostTableLookup(AVXConversionTblWide, ISD, - SimpleDstTy, SimpleSrcTy)) - return Entry->Cost; - } - if (ST->hasAVX()) { if (const auto *Entry = ConvertCostTableLookup(AVXConversionTbl, ISD, SimpleDstTy, SimpleSrcTy)) @@ -1731,12 +1681,6 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, return Entry->Cost; } - if (ST->hasSSE2() && ExperimentalVectorWideningLegalization) { - if (const auto *Entry = ConvertCostTableLookup(SSE2ConversionTblWide, ISD, - SimpleDstTy, SimpleSrcTy)) - return Entry->Cost; - } - if (ST->hasSSE2()) { if (const auto *Entry = ConvertCostTableLookup(SSE2ConversionTbl, ISD, SimpleDstTy, SimpleSrcTy)) @@ -2600,7 +2544,7 @@ int X86TTIImpl::getArithmeticReductionCost(unsigned Opcode, Type *ValTy, // in the table. // FIXME: Is there a better way to do this? EVT VT = TLI->getValueType(DL, ValTy); - if (VT.isSimple() && ExperimentalVectorWideningLegalization) { + if (VT.isSimple()) { MVT MTy = VT.getSimpleVT(); if (IsPairwise) { if (ST->hasAVX())