diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 2cb6ec376c43..58ea9e0ffd92 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -35656,10 +35656,21 @@ static SDValue combineInsertSubvector(SDNode *N, SelectionDAG &DAG, unsigned IdxVal = cast(Idx)->getZExtValue(); MVT SubVecVT = SubVec.getSimpleValueType(); - // Inserting zeros into zeros is a nop. - if (ISD::isBuildVectorAllZeros(Vec.getNode()) && - ISD::isBuildVectorAllZeros(SubVec.getNode())) - return Vec; + if (ISD::isBuildVectorAllZeros(Vec.getNode())) { + // Inserting zeros into zeros is a nop. + if (ISD::isBuildVectorAllZeros(SubVec.getNode())) + return Vec; + + // If we're inserting into a zero vector and then into a larger zero vector, + // just insert into the larger zero vector directly. + if (SubVec.getOpcode() == ISD::INSERT_SUBVECTOR && + ISD::isBuildVectorAllZeros(SubVec.getOperand(0).getNode())) { + unsigned Idx2Val = cast(Idx)->getZExtValue(); + return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, OpVT, Vec, + SubVec.getOperand(1), + DAG.getIntPtrConstant(IdxVal + Idx2Val, dl)); + } + } // If this is an insert of an extract, combine to a shuffle. Don't do this // if the insert or extract can be represented with a subregister operation. diff --git a/llvm/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll index e5487ae2b101..d9a0c4e02953 100644 --- a/llvm/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll +++ b/llvm/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll @@ -1134,13 +1134,11 @@ define <8 x double> @test_mm512_zextpd128_pd512(<2 x double> %a0) nounwind { ; X32-LABEL: test_mm512_zextpd128_pd512: ; X32: # BB#0: ; X32-NEXT: vmovaps %xmm0, %xmm0 -; X32-NEXT: vmovaps %ymm0, %ymm0 ; X32-NEXT: retl ; ; X64-LABEL: test_mm512_zextpd128_pd512: ; X64: # BB#0: ; X64-NEXT: vmovaps %xmm0, %xmm0 -; X64-NEXT: vmovaps %ymm0, %ymm0 ; X64-NEXT: retq %res = shufflevector <2 x double> %a0, <2 x double> zeroinitializer, <8 x i32> ret <8 x double> %res @@ -1196,13 +1194,11 @@ define <8 x i64> @test_mm512_zextsi128_si512(<2 x i64> %a0) nounwind { ; X32-LABEL: test_mm512_zextsi128_si512: ; X32: # BB#0: ; X32-NEXT: vmovaps %xmm0, %xmm0 -; X32-NEXT: vmovaps %ymm0, %ymm0 ; X32-NEXT: retl ; ; X64-LABEL: test_mm512_zextsi128_si512: ; X64: # BB#0: ; X64-NEXT: vmovaps %xmm0, %xmm0 -; X64-NEXT: vmovaps %ymm0, %ymm0 ; X64-NEXT: retq %res = shufflevector <2 x i64> %a0, <2 x i64> zeroinitializer, <8 x i32> ret <8 x i64> %res diff --git a/llvm/test/CodeGen/X86/merge-consecutive-loads-512.ll b/llvm/test/CodeGen/X86/merge-consecutive-loads-512.ll index 7049a72518a9..716f7767935e 100644 --- a/llvm/test/CodeGen/X86/merge-consecutive-loads-512.ll +++ b/llvm/test/CodeGen/X86/merge-consecutive-loads-512.ll @@ -107,14 +107,12 @@ define <8 x double> @merge_8f64_f64_12zzuuzz(double* %ptr) nounwind uwtable noin ; ALL-LABEL: merge_8f64_f64_12zzuuzz: ; ALL: # BB#0: ; ALL-NEXT: vmovaps 8(%rdi), %xmm0 -; ALL-NEXT: vmovaps %ymm0, %ymm0 ; ALL-NEXT: retq ; ; X32-AVX512F-LABEL: merge_8f64_f64_12zzuuzz: ; X32-AVX512F: # BB#0: ; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-AVX512F-NEXT: vmovaps 8(%eax), %xmm0 -; X32-AVX512F-NEXT: vmovaps %ymm0, %ymm0 ; X32-AVX512F-NEXT: retl %ptr0 = getelementptr inbounds double, double* %ptr, i64 1 %ptr1 = getelementptr inbounds double, double* %ptr, i64 2