diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 5bdcb89b8f50..1f6533c5c57f 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -39666,6 +39666,7 @@ static bool isHorizontalBinOp(SDValue &LHS, SDValue &RHS, SelectionDAG &DAG, "Unsupported vector type for horizontal add/sub"); unsigned NumElts = VT.getVectorNumElements(); + // TODO - can we make a general helper method that does all of this for us? auto GetShuffle = [&](SDValue Op, SDValue &N0, SDValue &N1, SmallVectorImpl &ShuffleMask) { if (Op.getOpcode() == ISD::VECTOR_SHUFFLE) { @@ -39677,17 +39678,33 @@ static bool isHorizontalBinOp(SDValue &LHS, SDValue &RHS, SelectionDAG &DAG, ShuffleMask.append(Mask.begin(), Mask.end()); return; } + bool UseSubVector = false; + if (Op.getOpcode() == ISD::EXTRACT_SUBVECTOR && + Op.getOperand(0).getValueType().is256BitVector() && + llvm::isNullConstant(Op.getOperand(1))) { + Op = Op.getOperand(0); + UseSubVector = true; + } bool IsUnary; SmallVector SrcOps; SmallVector SrcShuffleMask; SDValue BC = peekThroughBitcasts(Op); if (isTargetShuffle(BC.getOpcode()) && getTargetShuffleMask(BC.getNode(), BC.getSimpleValueType(), false, - SrcOps, SrcShuffleMask, IsUnary) && - SrcOps.size() <= 2 && SrcShuffleMask.size() == NumElts) { - N0 = SrcOps.size() > 0 ? SrcOps[0] : SDValue(); - N1 = SrcOps.size() > 1 ? SrcOps[1] : SDValue(); - ShuffleMask.append(SrcShuffleMask.begin(), SrcShuffleMask.end()); + SrcOps, SrcShuffleMask, IsUnary)) { + if (!UseSubVector && SrcShuffleMask.size() == NumElts && + SrcOps.size() <= 2) { + N0 = SrcOps.size() > 0 ? SrcOps[0] : SDValue(); + N1 = SrcOps.size() > 1 ? SrcOps[1] : SDValue(); + ShuffleMask.append(SrcShuffleMask.begin(), SrcShuffleMask.end()); + } + if (UseSubVector && (SrcShuffleMask.size() == (NumElts * 2)) && + SrcOps.size() == 1) { + N0 = extract128BitVector(SrcOps[0], 0, DAG, SDLoc(Op)); + N1 = extract128BitVector(SrcOps[0], NumElts, DAG, SDLoc(Op)); + ArrayRef Mask = ArrayRef(SrcShuffleMask).slice(0, NumElts); + ShuffleMask.append(Mask.begin(), Mask.end()); + } } }; diff --git a/llvm/test/CodeGen/X86/phaddsub.ll b/llvm/test/CodeGen/X86/phaddsub.ll index cadde6873b19..08cacce6abd8 100644 --- a/llvm/test/CodeGen/X86/phaddsub.ll +++ b/llvm/test/CodeGen/X86/phaddsub.ll @@ -649,11 +649,8 @@ define i32 @PR39936_v8i32(<8 x i32>) { ; ; AVX2-SHUF-LABEL: PR39936_v8i32: ; AVX2-SHUF: # %bb.0: -; AVX2-SHUF-NEXT: vmovdqa {{.*#+}} ymm1 = [0,2,4,6,4,6,6,7] -; AVX2-SHUF-NEXT: vpermd %ymm0, %ymm1, %ymm1 -; AVX2-SHUF-NEXT: vmovdqa {{.*#+}} ymm2 = [1,3,5,7,5,7,6,7] -; AVX2-SHUF-NEXT: vpermd %ymm0, %ymm2, %ymm0 -; AVX2-SHUF-NEXT: vpaddd %xmm0, %xmm1, %xmm0 +; AVX2-SHUF-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX2-SHUF-NEXT: vphaddd %xmm1, %xmm0, %xmm0 ; AVX2-SHUF-NEXT: vphaddd %xmm0, %xmm0, %xmm0 ; AVX2-SHUF-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] ; AVX2-SHUF-NEXT: vpaddd %xmm0, %xmm1, %xmm0