diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 4c3fd0d75e78..cef768e95dda 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -8621,10 +8621,12 @@ static SDValue lowerBuildVectorAsBroadcast(BuildVectorSDNode *BVOp, bool ConstSplatVal = (Ld.getOpcode() == ISD::Constant || Ld.getOpcode() == ISD::ConstantFP); + bool IsLoad = ISD::isNormalLoad(Ld.getNode()); // Make sure that all of the users of a non-constant load are from the // BUILD_VECTOR node. - if (!ConstSplatVal && !BVOp->isOnlyUserOf(Ld.getNode())) + // FIXME: Is the use count needed for non-constant, non-load case? + if (!ConstSplatVal && !IsLoad && !BVOp->isOnlyUserOf(Ld.getNode())) return SDValue(); unsigned ScalarSize = Ld.getValueSizeInBits(); @@ -8674,8 +8676,6 @@ static SDValue lowerBuildVectorAsBroadcast(BuildVectorSDNode *BVOp, } } - bool IsLoad = ISD::isNormalLoad(Ld.getNode()); - // Handle AVX2 in-register broadcasts. if (!IsLoad && Subtarget.hasInt256() && (ScalarSize == 32 || (IsGE256 && ScalarSize == 64))) @@ -8685,6 +8685,10 @@ static SDValue lowerBuildVectorAsBroadcast(BuildVectorSDNode *BVOp, if (!IsLoad) return SDValue(); + // Make sure the non-chain result is only used by this build vector. + if (!Ld->hasNUsesOfValue(NumElts - NumUndefElts, 0)) + return SDValue(); + if (ScalarSize == 32 || (IsGE256 && ScalarSize == 64) || (Subtarget.hasVLX() && ScalarSize == 64)) return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld); diff --git a/llvm/test/CodeGen/X86/pr35443.ll b/llvm/test/CodeGen/X86/pr35443.ll index a42a14bdbcab..01001b021bec 100644 --- a/llvm/test/CodeGen/X86/pr35443.ll +++ b/llvm/test/CodeGen/X86/pr35443.ll @@ -8,8 +8,7 @@ define void @pr35443() { ; CHECK-LABEL: pr35443: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movzbl ac+{{.*}}(%rip), %eax -; CHECK-NEXT: vmovd %eax, %xmm0 +; CHECK-NEXT: vpbroadcastb ac+{{.*}}(%rip), %xmm0 ; CHECK-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; CHECK-NEXT: vpsubq %ymm0, %ymm1, %ymm0