diff --git a/llvm/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp b/llvm/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp index b039cdd01d47..264b7cd800ca 100644 --- a/llvm/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp +++ b/llvm/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp @@ -368,10 +368,8 @@ static void scalarizeMaskedGather(CallInst *CI) { Builder.SetCurrentDebugLocation(CI->getDebugLoc()); - Value *UndefVal = UndefValue::get(VecType); - // The result vector - Value *VResult = UndefVal; + Value *VResult = Src0; unsigned VectorWidth = VecType->getNumElements(); // Shorten the way if the mask is a vector of constants. @@ -386,28 +384,17 @@ static void scalarizeMaskedGather(CallInst *CI) { VResult = Builder.CreateInsertElement( VResult, Load, Builder.getInt32(Idx), "Res" + Twine(Idx)); } - Value *NewI = Builder.CreateSelect(Mask, VResult, Src0); - CI->replaceAllUsesWith(NewI); + CI->replaceAllUsesWith(VResult); CI->eraseFromParent(); return; } - PHINode *Phi = nullptr; - Value *PrevPhi = UndefVal; - for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { // Fill the "else" block, created in the previous iteration // // %Mask1 = extractelement <16 x i1> %Mask, i32 1 // br i1 %Mask1, label %cond.load, label %else // - if (Idx > 0) { - Phi = Builder.CreatePHI(VecType, 2, "res.phi.else"); - Phi->addIncoming(VResult, CondBlock); - Phi->addIncoming(PrevPhi, PrevIfBlock); - PrevPhi = Phi; - VResult = Phi; - } Value *Predicate = Builder.CreateExtractElement(Mask, Builder.getInt32(Idx), "Mask" + Twine(Idx)); @@ -425,8 +412,9 @@ static void scalarizeMaskedGather(CallInst *CI) { "Ptr" + Twine(Idx)); LoadInst *Load = Builder.CreateAlignedLoad(Ptr, AlignVal, "Load" + Twine(Idx)); - VResult = Builder.CreateInsertElement(VResult, Load, Builder.getInt32(Idx), - "Res" + Twine(Idx)); + Value *NewVResult = Builder.CreateInsertElement(VResult, Load, + Builder.getInt32(Idx), + "Res" + Twine(Idx)); // Create "else" block, fill it in the next iteration BasicBlock *NewIfBlock = CondBlock->splitBasicBlock(InsertPt, "else"); @@ -436,13 +424,14 @@ static void scalarizeMaskedGather(CallInst *CI) { OldBr->eraseFromParent(); PrevIfBlock = IfBlock; IfBlock = NewIfBlock; + + PHINode *Phi = Builder.CreatePHI(VecType, 2, "res.phi.else"); + Phi->addIncoming(NewVResult, CondBlock); + Phi->addIncoming(VResult, PrevIfBlock); + VResult = Phi; } - Phi = Builder.CreatePHI(VecType, 2, "res.phi.select"); - Phi->addIncoming(VResult, CondBlock); - Phi->addIncoming(PrevPhi, PrevIfBlock); - Value *NewI = Builder.CreateSelect(Mask, Phi, Src0); - CI->replaceAllUsesWith(NewI); + CI->replaceAllUsesWith(VResult); CI->eraseFromParent(); } diff --git a/llvm/test/CodeGen/X86/avx2-masked-gather.ll b/llvm/test/CodeGen/X86/avx2-masked-gather.ll index 0058c8b619ea..3a831a88aa67 100644 --- a/llvm/test/CodeGen/X86/avx2-masked-gather.ll +++ b/llvm/test/CodeGen/X86/avx2-masked-gather.ll @@ -30,25 +30,24 @@ define <2 x i32> @masked_gather_v2i32(<2 x i32*>* %ptr, <2 x i1> %masks, <2 x i3 ; ; NOGATHER-LABEL: masked_gather_v2i32: ; NOGATHER: # %bb.0: # %entry -; NOGATHER-NEXT: vmovdqa (%rdi), %xmm3 +; NOGATHER-NEXT: vmovdqa (%rdi), %xmm2 ; NOGATHER-NEXT: vpextrb $0, %xmm0, %eax -; NOGATHER-NEXT: # implicit-def: $xmm2 ; NOGATHER-NEXT: testb $1, %al ; NOGATHER-NEXT: je .LBB0_2 ; NOGATHER-NEXT: # %bb.1: # %cond.load -; NOGATHER-NEXT: vmovq %xmm3, %rax -; NOGATHER-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero +; NOGATHER-NEXT: vmovq %xmm2, %rax +; NOGATHER-NEXT: movl (%rax), %eax +; NOGATHER-NEXT: vpinsrq $0, %rax, %xmm1, %xmm1 ; NOGATHER-NEXT: .LBB0_2: # %else ; NOGATHER-NEXT: vpextrb $8, %xmm0, %eax ; NOGATHER-NEXT: testb $1, %al ; NOGATHER-NEXT: je .LBB0_4 ; NOGATHER-NEXT: # %bb.3: # %cond.load1 -; NOGATHER-NEXT: vpextrq $1, %xmm3, %rax +; NOGATHER-NEXT: vpextrq $1, %xmm2, %rax ; NOGATHER-NEXT: movl (%rax), %eax -; NOGATHER-NEXT: vpinsrq $1, %rax, %xmm2, %xmm2 +; NOGATHER-NEXT: vpinsrq $1, %rax, %xmm1, %xmm1 ; NOGATHER-NEXT: .LBB0_4: # %else2 -; NOGATHER-NEXT: vpsllq $63, %xmm0, %xmm0 -; NOGATHER-NEXT: vblendvpd %xmm0, %xmm2, %xmm1, %xmm0 +; NOGATHER-NEXT: vmovdqa %xmm1, %xmm0 ; NOGATHER-NEXT: retq entry: %ld = load <2 x i32*>, <2 x i32*>* %ptr @@ -80,26 +79,24 @@ define <4 x i32> @masked_gather_v2i32_concat(<2 x i32*>* %ptr, <2 x i1> %masks, ; ; NOGATHER-LABEL: masked_gather_v2i32_concat: ; NOGATHER: # %bb.0: # %entry -; NOGATHER-NEXT: vmovdqa (%rdi), %xmm3 +; NOGATHER-NEXT: vmovdqa (%rdi), %xmm2 ; NOGATHER-NEXT: vpextrb $0, %xmm0, %eax -; NOGATHER-NEXT: # implicit-def: $xmm2 ; NOGATHER-NEXT: testb $1, %al ; NOGATHER-NEXT: je .LBB1_2 ; NOGATHER-NEXT: # %bb.1: # %cond.load -; NOGATHER-NEXT: vmovq %xmm3, %rax -; NOGATHER-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero +; NOGATHER-NEXT: vmovq %xmm2, %rax +; NOGATHER-NEXT: movl (%rax), %eax +; NOGATHER-NEXT: vpinsrq $0, %rax, %xmm1, %xmm1 ; NOGATHER-NEXT: .LBB1_2: # %else ; NOGATHER-NEXT: vpextrb $8, %xmm0, %eax ; NOGATHER-NEXT: testb $1, %al ; NOGATHER-NEXT: je .LBB1_4 ; NOGATHER-NEXT: # %bb.3: # %cond.load1 -; NOGATHER-NEXT: vpextrq $1, %xmm3, %rax +; NOGATHER-NEXT: vpextrq $1, %xmm2, %rax ; NOGATHER-NEXT: movl (%rax), %eax -; NOGATHER-NEXT: vpinsrq $1, %rax, %xmm2, %xmm2 +; NOGATHER-NEXT: vpinsrq $1, %rax, %xmm1, %xmm1 ; NOGATHER-NEXT: .LBB1_4: # %else2 -; NOGATHER-NEXT: vpsllq $63, %xmm0, %xmm0 -; NOGATHER-NEXT: vblendvpd %xmm0, %xmm2, %xmm1, %xmm0 -; NOGATHER-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3] +; NOGATHER-NEXT: vpshufd {{.*#+}} xmm0 = xmm1[0,2,2,3] ; NOGATHER-NEXT: retq entry: %ld = load <2 x i32*>, <2 x i32*>* %ptr @@ -132,25 +129,23 @@ define <2 x float> @masked_gather_v2float(<2 x float*>* %ptr, <2 x i1> %masks, < ; ; NOGATHER-LABEL: masked_gather_v2float: ; NOGATHER: # %bb.0: # %entry -; NOGATHER-NEXT: vmovdqa (%rdi), %xmm3 +; NOGATHER-NEXT: vmovdqa (%rdi), %xmm2 ; NOGATHER-NEXT: vpextrb $0, %xmm0, %eax -; NOGATHER-NEXT: # implicit-def: $xmm2 ; NOGATHER-NEXT: testb $1, %al ; NOGATHER-NEXT: je .LBB2_2 ; NOGATHER-NEXT: # %bb.1: # %cond.load -; NOGATHER-NEXT: vmovq %xmm3, %rax -; NOGATHER-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; NOGATHER-NEXT: vmovq %xmm2, %rax +; NOGATHER-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero +; NOGATHER-NEXT: vblendps {{.*#+}} xmm1 = xmm3[0],xmm1[1,2,3] ; NOGATHER-NEXT: .LBB2_2: # %else ; NOGATHER-NEXT: vpextrb $8, %xmm0, %eax ; NOGATHER-NEXT: testb $1, %al ; NOGATHER-NEXT: je .LBB2_4 ; NOGATHER-NEXT: # %bb.3: # %cond.load1 -; NOGATHER-NEXT: vpextrq $1, %xmm3, %rax -; NOGATHER-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0],mem[0],xmm2[2,3] +; NOGATHER-NEXT: vpextrq $1, %xmm2, %rax +; NOGATHER-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[2,3] ; NOGATHER-NEXT: .LBB2_4: # %else2 -; NOGATHER-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; NOGATHER-NEXT: vpslld $31, %xmm0, %xmm0 -; NOGATHER-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0 +; NOGATHER-NEXT: vmovaps %xmm1, %xmm0 ; NOGATHER-NEXT: retq entry: %ld = load <2 x float*>, <2 x float*>* %ptr @@ -180,25 +175,23 @@ define <4 x float> @masked_gather_v2float_concat(<2 x float*>* %ptr, <2 x i1> %m ; ; NOGATHER-LABEL: masked_gather_v2float_concat: ; NOGATHER: # %bb.0: # %entry -; NOGATHER-NEXT: vmovdqa (%rdi), %xmm3 +; NOGATHER-NEXT: vmovdqa (%rdi), %xmm2 ; NOGATHER-NEXT: vpextrb $0, %xmm0, %eax -; NOGATHER-NEXT: # implicit-def: $xmm2 ; NOGATHER-NEXT: testb $1, %al ; NOGATHER-NEXT: je .LBB3_2 ; NOGATHER-NEXT: # %bb.1: # %cond.load -; NOGATHER-NEXT: vmovq %xmm3, %rax -; NOGATHER-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; NOGATHER-NEXT: vmovq %xmm2, %rax +; NOGATHER-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero +; NOGATHER-NEXT: vblendps {{.*#+}} xmm1 = xmm3[0],xmm1[1,2,3] ; NOGATHER-NEXT: .LBB3_2: # %else ; NOGATHER-NEXT: vpextrb $8, %xmm0, %eax ; NOGATHER-NEXT: testb $1, %al ; NOGATHER-NEXT: je .LBB3_4 ; NOGATHER-NEXT: # %bb.3: # %cond.load1 -; NOGATHER-NEXT: vpextrq $1, %xmm3, %rax -; NOGATHER-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0],mem[0],xmm2[2,3] +; NOGATHER-NEXT: vpextrq $1, %xmm2, %rax +; NOGATHER-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[2,3] ; NOGATHER-NEXT: .LBB3_4: # %else2 -; NOGATHER-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; NOGATHER-NEXT: vpslld $31, %xmm0, %xmm0 -; NOGATHER-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0 +; NOGATHER-NEXT: vmovaps %xmm1, %xmm0 ; NOGATHER-NEXT: retq entry: %ld = load <2 x float*>, <2 x float*>* %ptr @@ -229,27 +222,26 @@ define <4 x i32> @masked_gather_v4i32(<4 x i32*> %ptrs, <4 x i1> %masks, <4 x i3 ; NOGATHER-LABEL: masked_gather_v4i32: ; NOGATHER: # %bb.0: # %entry ; NOGATHER-NEXT: vpextrb $0, %xmm1, %eax -; NOGATHER-NEXT: # implicit-def: $xmm3 ; NOGATHER-NEXT: testb $1, %al ; NOGATHER-NEXT: je .LBB4_2 ; NOGATHER-NEXT: # %bb.1: # %cond.load ; NOGATHER-NEXT: vmovq %xmm0, %rax -; NOGATHER-NEXT: vmovd {{.*#+}} xmm3 = mem[0],zero,zero,zero +; NOGATHER-NEXT: vpinsrd $0, (%rax), %xmm2, %xmm2 ; NOGATHER-NEXT: .LBB4_2: # %else ; NOGATHER-NEXT: vpextrb $4, %xmm1, %eax ; NOGATHER-NEXT: testb $1, %al ; NOGATHER-NEXT: je .LBB4_4 ; NOGATHER-NEXT: # %bb.3: # %cond.load1 ; NOGATHER-NEXT: vpextrq $1, %xmm0, %rax -; NOGATHER-NEXT: vpinsrd $1, (%rax), %xmm3, %xmm3 +; NOGATHER-NEXT: vpinsrd $1, (%rax), %xmm2, %xmm2 ; NOGATHER-NEXT: .LBB4_4: # %else2 ; NOGATHER-NEXT: vpextrb $8, %xmm1, %eax ; NOGATHER-NEXT: testb $1, %al ; NOGATHER-NEXT: je .LBB4_6 ; NOGATHER-NEXT: # %bb.5: # %cond.load4 -; NOGATHER-NEXT: vextractf128 $1, %ymm0, %xmm4 -; NOGATHER-NEXT: vmovq %xmm4, %rax -; NOGATHER-NEXT: vpinsrd $2, (%rax), %xmm3, %xmm3 +; NOGATHER-NEXT: vextractf128 $1, %ymm0, %xmm3 +; NOGATHER-NEXT: vmovq %xmm3, %rax +; NOGATHER-NEXT: vpinsrd $2, (%rax), %xmm2, %xmm2 ; NOGATHER-NEXT: .LBB4_6: # %else5 ; NOGATHER-NEXT: vpextrb $12, %xmm1, %eax ; NOGATHER-NEXT: testb $1, %al @@ -257,10 +249,9 @@ define <4 x i32> @masked_gather_v4i32(<4 x i32*> %ptrs, <4 x i1> %masks, <4 x i3 ; NOGATHER-NEXT: # %bb.7: # %cond.load7 ; NOGATHER-NEXT: vextractf128 $1, %ymm0, %xmm0 ; NOGATHER-NEXT: vpextrq $1, %xmm0, %rax -; NOGATHER-NEXT: vpinsrd $3, (%rax), %xmm3, %xmm3 +; NOGATHER-NEXT: vpinsrd $3, (%rax), %xmm2, %xmm2 ; NOGATHER-NEXT: .LBB4_8: # %else8 -; NOGATHER-NEXT: vpslld $31, %xmm1, %xmm0 -; NOGATHER-NEXT: vblendvps %xmm0, %xmm3, %xmm2, %xmm0 +; NOGATHER-NEXT: vmovdqa %xmm2, %xmm0 ; NOGATHER-NEXT: vzeroupper ; NOGATHER-NEXT: retq entry: @@ -289,27 +280,27 @@ define <4 x float> @masked_gather_v4float(<4 x float*> %ptrs, <4 x i1> %masks, < ; NOGATHER-LABEL: masked_gather_v4float: ; NOGATHER: # %bb.0: # %entry ; NOGATHER-NEXT: vpextrb $0, %xmm1, %eax -; NOGATHER-NEXT: # implicit-def: $xmm3 ; NOGATHER-NEXT: testb $1, %al ; NOGATHER-NEXT: je .LBB5_2 ; NOGATHER-NEXT: # %bb.1: # %cond.load ; NOGATHER-NEXT: vmovq %xmm0, %rax ; NOGATHER-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero +; NOGATHER-NEXT: vblendps {{.*#+}} xmm2 = xmm3[0],xmm2[1,2,3] ; NOGATHER-NEXT: .LBB5_2: # %else ; NOGATHER-NEXT: vpextrb $4, %xmm1, %eax ; NOGATHER-NEXT: testb $1, %al ; NOGATHER-NEXT: je .LBB5_4 ; NOGATHER-NEXT: # %bb.3: # %cond.load1 ; NOGATHER-NEXT: vpextrq $1, %xmm0, %rax -; NOGATHER-NEXT: vinsertps {{.*#+}} xmm3 = xmm3[0],mem[0],xmm3[2,3] +; NOGATHER-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0],mem[0],xmm2[2,3] ; NOGATHER-NEXT: .LBB5_4: # %else2 ; NOGATHER-NEXT: vpextrb $8, %xmm1, %eax ; NOGATHER-NEXT: testb $1, %al ; NOGATHER-NEXT: je .LBB5_6 ; NOGATHER-NEXT: # %bb.5: # %cond.load4 -; NOGATHER-NEXT: vextractf128 $1, %ymm0, %xmm4 -; NOGATHER-NEXT: vmovq %xmm4, %rax -; NOGATHER-NEXT: vinsertps {{.*#+}} xmm3 = xmm3[0,1],mem[0],xmm3[3] +; NOGATHER-NEXT: vextractf128 $1, %ymm0, %xmm3 +; NOGATHER-NEXT: vmovq %xmm3, %rax +; NOGATHER-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],mem[0],xmm2[3] ; NOGATHER-NEXT: .LBB5_6: # %else5 ; NOGATHER-NEXT: vpextrb $12, %xmm1, %eax ; NOGATHER-NEXT: testb $1, %al @@ -317,10 +308,9 @@ define <4 x float> @masked_gather_v4float(<4 x float*> %ptrs, <4 x i1> %masks, < ; NOGATHER-NEXT: # %bb.7: # %cond.load7 ; NOGATHER-NEXT: vextractf128 $1, %ymm0, %xmm0 ; NOGATHER-NEXT: vpextrq $1, %xmm0, %rax -; NOGATHER-NEXT: vinsertps {{.*#+}} xmm3 = xmm3[0,1,2],mem[0] +; NOGATHER-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1,2],mem[0] ; NOGATHER-NEXT: .LBB5_8: # %else8 -; NOGATHER-NEXT: vpslld $31, %xmm1, %xmm0 -; NOGATHER-NEXT: vblendvps %xmm0, %xmm3, %xmm2, %xmm0 +; NOGATHER-NEXT: vmovaps %xmm2, %xmm0 ; NOGATHER-NEXT: vzeroupper ; NOGATHER-NEXT: retq entry: @@ -357,86 +347,81 @@ define <8 x i32> @masked_gather_v8i32(<8 x i32*>* %ptr, <8 x i1> %masks, <8 x i3 ; ; NOGATHER-LABEL: masked_gather_v8i32: ; NOGATHER: # %bb.0: # %entry -; NOGATHER-NEXT: vmovdqa (%rdi), %ymm4 -; NOGATHER-NEXT: vmovdqa 32(%rdi), %ymm3 +; NOGATHER-NEXT: vmovdqa (%rdi), %ymm3 +; NOGATHER-NEXT: vmovdqa 32(%rdi), %ymm2 ; NOGATHER-NEXT: vpextrb $0, %xmm0, %eax -; NOGATHER-NEXT: # implicit-def: $ymm2 ; NOGATHER-NEXT: testb $1, %al ; NOGATHER-NEXT: je .LBB6_2 ; NOGATHER-NEXT: # %bb.1: # %cond.load -; NOGATHER-NEXT: vmovq %xmm4, %rax -; NOGATHER-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero +; NOGATHER-NEXT: vmovq %xmm3, %rax +; NOGATHER-NEXT: vpinsrd $0, (%rax), %xmm1, %xmm4 +; NOGATHER-NEXT: vblendps {{.*#+}} ymm1 = ymm4[0,1,2,3],ymm1[4,5,6,7] ; NOGATHER-NEXT: .LBB6_2: # %else ; NOGATHER-NEXT: vpextrb $2, %xmm0, %eax ; NOGATHER-NEXT: testb $1, %al ; NOGATHER-NEXT: je .LBB6_4 ; NOGATHER-NEXT: # %bb.3: # %cond.load1 -; NOGATHER-NEXT: vpextrq $1, %xmm4, %rax -; NOGATHER-NEXT: vpinsrd $1, (%rax), %xmm2, %xmm5 -; NOGATHER-NEXT: vblendps {{.*#+}} ymm2 = ymm5[0,1,2,3],ymm2[4,5,6,7] +; NOGATHER-NEXT: vpextrq $1, %xmm3, %rax +; NOGATHER-NEXT: vpinsrd $1, (%rax), %xmm1, %xmm4 +; NOGATHER-NEXT: vblendps {{.*#+}} ymm1 = ymm4[0,1,2,3],ymm1[4,5,6,7] ; NOGATHER-NEXT: .LBB6_4: # %else2 ; NOGATHER-NEXT: vpextrb $4, %xmm0, %eax ; NOGATHER-NEXT: testb $1, %al ; NOGATHER-NEXT: je .LBB6_6 ; NOGATHER-NEXT: # %bb.5: # %cond.load4 -; NOGATHER-NEXT: vextractf128 $1, %ymm4, %xmm5 -; NOGATHER-NEXT: vmovq %xmm5, %rax -; NOGATHER-NEXT: vpinsrd $2, (%rax), %xmm2, %xmm5 -; NOGATHER-NEXT: vblendps {{.*#+}} ymm2 = ymm5[0,1,2,3],ymm2[4,5,6,7] +; NOGATHER-NEXT: vextractf128 $1, %ymm3, %xmm4 +; NOGATHER-NEXT: vmovq %xmm4, %rax +; NOGATHER-NEXT: vpinsrd $2, (%rax), %xmm1, %xmm4 +; NOGATHER-NEXT: vblendps {{.*#+}} ymm1 = ymm4[0,1,2,3],ymm1[4,5,6,7] ; NOGATHER-NEXT: .LBB6_6: # %else5 ; NOGATHER-NEXT: vpextrb $6, %xmm0, %eax ; NOGATHER-NEXT: testb $1, %al ; NOGATHER-NEXT: je .LBB6_8 ; NOGATHER-NEXT: # %bb.7: # %cond.load7 -; NOGATHER-NEXT: vextractf128 $1, %ymm4, %xmm4 -; NOGATHER-NEXT: vpextrq $1, %xmm4, %rax -; NOGATHER-NEXT: vpinsrd $3, (%rax), %xmm2, %xmm4 -; NOGATHER-NEXT: vblendps {{.*#+}} ymm2 = ymm4[0,1,2,3],ymm2[4,5,6,7] +; NOGATHER-NEXT: vextractf128 $1, %ymm3, %xmm3 +; NOGATHER-NEXT: vpextrq $1, %xmm3, %rax +; NOGATHER-NEXT: vpinsrd $3, (%rax), %xmm1, %xmm3 +; NOGATHER-NEXT: vblendps {{.*#+}} ymm1 = ymm3[0,1,2,3],ymm1[4,5,6,7] ; NOGATHER-NEXT: .LBB6_8: # %else8 ; NOGATHER-NEXT: vpextrb $8, %xmm0, %eax ; NOGATHER-NEXT: testb $1, %al ; NOGATHER-NEXT: je .LBB6_10 ; NOGATHER-NEXT: # %bb.9: # %cond.load10 -; NOGATHER-NEXT: vmovq %xmm3, %rax -; NOGATHER-NEXT: vextractf128 $1, %ymm2, %xmm4 -; NOGATHER-NEXT: vpinsrd $0, (%rax), %xmm4, %xmm4 -; NOGATHER-NEXT: vinsertf128 $1, %xmm4, %ymm2, %ymm2 +; NOGATHER-NEXT: vmovq %xmm2, %rax +; NOGATHER-NEXT: vextractf128 $1, %ymm1, %xmm3 +; NOGATHER-NEXT: vpinsrd $0, (%rax), %xmm3, %xmm3 +; NOGATHER-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1 ; NOGATHER-NEXT: .LBB6_10: # %else11 ; NOGATHER-NEXT: vpextrb $10, %xmm0, %eax ; NOGATHER-NEXT: testb $1, %al ; NOGATHER-NEXT: je .LBB6_12 ; NOGATHER-NEXT: # %bb.11: # %cond.load13 -; NOGATHER-NEXT: vpextrq $1, %xmm3, %rax -; NOGATHER-NEXT: vextractf128 $1, %ymm2, %xmm4 -; NOGATHER-NEXT: vpinsrd $1, (%rax), %xmm4, %xmm4 -; NOGATHER-NEXT: vinsertf128 $1, %xmm4, %ymm2, %ymm2 +; NOGATHER-NEXT: vpextrq $1, %xmm2, %rax +; NOGATHER-NEXT: vextractf128 $1, %ymm1, %xmm3 +; NOGATHER-NEXT: vpinsrd $1, (%rax), %xmm3, %xmm3 +; NOGATHER-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1 ; NOGATHER-NEXT: .LBB6_12: # %else14 ; NOGATHER-NEXT: vpextrb $12, %xmm0, %eax ; NOGATHER-NEXT: testb $1, %al ; NOGATHER-NEXT: je .LBB6_14 ; NOGATHER-NEXT: # %bb.13: # %cond.load16 -; NOGATHER-NEXT: vextractf128 $1, %ymm3, %xmm4 -; NOGATHER-NEXT: vmovq %xmm4, %rax -; NOGATHER-NEXT: vextractf128 $1, %ymm2, %xmm4 -; NOGATHER-NEXT: vpinsrd $2, (%rax), %xmm4, %xmm4 -; NOGATHER-NEXT: vinsertf128 $1, %xmm4, %ymm2, %ymm2 +; NOGATHER-NEXT: vextractf128 $1, %ymm2, %xmm3 +; NOGATHER-NEXT: vmovq %xmm3, %rax +; NOGATHER-NEXT: vextractf128 $1, %ymm1, %xmm3 +; NOGATHER-NEXT: vpinsrd $2, (%rax), %xmm3, %xmm3 +; NOGATHER-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1 ; NOGATHER-NEXT: .LBB6_14: # %else17 ; NOGATHER-NEXT: vpextrb $14, %xmm0, %eax ; NOGATHER-NEXT: testb $1, %al ; NOGATHER-NEXT: je .LBB6_16 ; NOGATHER-NEXT: # %bb.15: # %cond.load19 -; NOGATHER-NEXT: vextractf128 $1, %ymm3, %xmm3 -; NOGATHER-NEXT: vpextrq $1, %xmm3, %rax -; NOGATHER-NEXT: vextractf128 $1, %ymm2, %xmm3 -; NOGATHER-NEXT: vpinsrd $3, (%rax), %xmm3, %xmm3 -; NOGATHER-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2 +; NOGATHER-NEXT: vextractf128 $1, %ymm2, %xmm0 +; NOGATHER-NEXT: vpextrq $1, %xmm0, %rax +; NOGATHER-NEXT: vextractf128 $1, %ymm1, %xmm0 +; NOGATHER-NEXT: vpinsrd $3, (%rax), %xmm0, %xmm0 +; NOGATHER-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm1 ; NOGATHER-NEXT: .LBB6_16: # %else20 -; NOGATHER-NEXT: vpmovzxwd {{.*#+}} xmm3 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero -; NOGATHER-NEXT: vpslld $31, %xmm3, %xmm3 -; NOGATHER-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7] -; NOGATHER-NEXT: vpslld $31, %xmm0, %xmm0 -; NOGATHER-NEXT: vinsertf128 $1, %xmm0, %ymm3, %ymm0 -; NOGATHER-NEXT: vblendvps %ymm0, %ymm2, %ymm1, %ymm0 +; NOGATHER-NEXT: vmovaps %ymm1, %ymm0 ; NOGATHER-NEXT: retq entry: %ld = load <8 x i32*>, <8 x i32*>* %ptr @@ -473,87 +458,82 @@ define <8 x float> @masked_gather_v8float(<8 x float*>* %ptr, <8 x i1> %masks, < ; ; NOGATHER-LABEL: masked_gather_v8float: ; NOGATHER: # %bb.0: # %entry -; NOGATHER-NEXT: vmovdqa (%rdi), %ymm4 -; NOGATHER-NEXT: vmovdqa 32(%rdi), %ymm3 +; NOGATHER-NEXT: vmovdqa (%rdi), %ymm3 +; NOGATHER-NEXT: vmovdqa 32(%rdi), %ymm2 ; NOGATHER-NEXT: vpextrb $0, %xmm0, %eax -; NOGATHER-NEXT: # implicit-def: $ymm2 ; NOGATHER-NEXT: testb $1, %al ; NOGATHER-NEXT: je .LBB7_2 ; NOGATHER-NEXT: # %bb.1: # %cond.load -; NOGATHER-NEXT: vmovq %xmm4, %rax -; NOGATHER-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; NOGATHER-NEXT: vmovq %xmm3, %rax +; NOGATHER-NEXT: vmovss {{.*#+}} xmm4 = mem[0],zero,zero,zero +; NOGATHER-NEXT: vblendps {{.*#+}} ymm1 = ymm4[0],ymm1[1,2,3,4,5,6,7] ; NOGATHER-NEXT: .LBB7_2: # %else ; NOGATHER-NEXT: vpextrb $2, %xmm0, %eax ; NOGATHER-NEXT: testb $1, %al ; NOGATHER-NEXT: je .LBB7_4 ; NOGATHER-NEXT: # %bb.3: # %cond.load1 -; NOGATHER-NEXT: vpextrq $1, %xmm4, %rax -; NOGATHER-NEXT: vinsertps {{.*#+}} xmm5 = xmm2[0],mem[0],xmm2[2,3] -; NOGATHER-NEXT: vblendps {{.*#+}} ymm2 = ymm5[0,1,2,3],ymm2[4,5,6,7] +; NOGATHER-NEXT: vpextrq $1, %xmm3, %rax +; NOGATHER-NEXT: vinsertps {{.*#+}} xmm4 = xmm1[0],mem[0],xmm1[2,3] +; NOGATHER-NEXT: vblendps {{.*#+}} ymm1 = ymm4[0,1,2,3],ymm1[4,5,6,7] ; NOGATHER-NEXT: .LBB7_4: # %else2 ; NOGATHER-NEXT: vpextrb $4, %xmm0, %eax ; NOGATHER-NEXT: testb $1, %al ; NOGATHER-NEXT: je .LBB7_6 ; NOGATHER-NEXT: # %bb.5: # %cond.load4 -; NOGATHER-NEXT: vextractf128 $1, %ymm4, %xmm5 -; NOGATHER-NEXT: vmovq %xmm5, %rax -; NOGATHER-NEXT: vinsertps {{.*#+}} xmm5 = xmm2[0,1],mem[0],xmm2[3] -; NOGATHER-NEXT: vblendps {{.*#+}} ymm2 = ymm5[0,1,2,3],ymm2[4,5,6,7] +; NOGATHER-NEXT: vextractf128 $1, %ymm3, %xmm4 +; NOGATHER-NEXT: vmovq %xmm4, %rax +; NOGATHER-NEXT: vinsertps {{.*#+}} xmm4 = xmm1[0,1],mem[0],xmm1[3] +; NOGATHER-NEXT: vblendps {{.*#+}} ymm1 = ymm4[0,1,2,3],ymm1[4,5,6,7] ; NOGATHER-NEXT: .LBB7_6: # %else5 ; NOGATHER-NEXT: vpextrb $6, %xmm0, %eax ; NOGATHER-NEXT: testb $1, %al ; NOGATHER-NEXT: je .LBB7_8 ; NOGATHER-NEXT: # %bb.7: # %cond.load7 -; NOGATHER-NEXT: vextractf128 $1, %ymm4, %xmm4 -; NOGATHER-NEXT: vpextrq $1, %xmm4, %rax -; NOGATHER-NEXT: vinsertps {{.*#+}} xmm4 = xmm2[0,1,2],mem[0] -; NOGATHER-NEXT: vblendps {{.*#+}} ymm2 = ymm4[0,1,2,3],ymm2[4,5,6,7] +; NOGATHER-NEXT: vextractf128 $1, %ymm3, %xmm3 +; NOGATHER-NEXT: vpextrq $1, %xmm3, %rax +; NOGATHER-NEXT: vinsertps {{.*#+}} xmm3 = xmm1[0,1,2],mem[0] +; NOGATHER-NEXT: vblendps {{.*#+}} ymm1 = ymm3[0,1,2,3],ymm1[4,5,6,7] ; NOGATHER-NEXT: .LBB7_8: # %else8 ; NOGATHER-NEXT: vpextrb $8, %xmm0, %eax ; NOGATHER-NEXT: testb $1, %al ; NOGATHER-NEXT: je .LBB7_10 ; NOGATHER-NEXT: # %bb.9: # %cond.load10 -; NOGATHER-NEXT: vmovq %xmm3, %rax -; NOGATHER-NEXT: vmovss {{.*#+}} xmm4 = mem[0],zero,zero,zero -; NOGATHER-NEXT: vextractf128 $1, %ymm2, %xmm5 -; NOGATHER-NEXT: vblendps {{.*#+}} xmm4 = xmm4[0],xmm5[1,2,3] -; NOGATHER-NEXT: vinsertf128 $1, %xmm4, %ymm2, %ymm2 +; NOGATHER-NEXT: vmovq %xmm2, %rax +; NOGATHER-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero +; NOGATHER-NEXT: vextractf128 $1, %ymm1, %xmm4 +; NOGATHER-NEXT: vblendps {{.*#+}} xmm3 = xmm3[0],xmm4[1,2,3] +; NOGATHER-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1 ; NOGATHER-NEXT: .LBB7_10: # %else11 ; NOGATHER-NEXT: vpextrb $10, %xmm0, %eax ; NOGATHER-NEXT: testb $1, %al ; NOGATHER-NEXT: je .LBB7_12 ; NOGATHER-NEXT: # %bb.11: # %cond.load13 -; NOGATHER-NEXT: vpextrq $1, %xmm3, %rax -; NOGATHER-NEXT: vextractf128 $1, %ymm2, %xmm4 -; NOGATHER-NEXT: vinsertps {{.*#+}} xmm4 = xmm4[0],mem[0],xmm4[2,3] -; NOGATHER-NEXT: vinsertf128 $1, %xmm4, %ymm2, %ymm2 +; NOGATHER-NEXT: vpextrq $1, %xmm2, %rax +; NOGATHER-NEXT: vextractf128 $1, %ymm1, %xmm3 +; NOGATHER-NEXT: vinsertps {{.*#+}} xmm3 = xmm3[0],mem[0],xmm3[2,3] +; NOGATHER-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1 ; NOGATHER-NEXT: .LBB7_12: # %else14 ; NOGATHER-NEXT: vpextrb $12, %xmm0, %eax ; NOGATHER-NEXT: testb $1, %al ; NOGATHER-NEXT: je .LBB7_14 ; NOGATHER-NEXT: # %bb.13: # %cond.load16 -; NOGATHER-NEXT: vextractf128 $1, %ymm3, %xmm4 -; NOGATHER-NEXT: vmovq %xmm4, %rax -; NOGATHER-NEXT: vextractf128 $1, %ymm2, %xmm4 -; NOGATHER-NEXT: vinsertps {{.*#+}} xmm4 = xmm4[0,1],mem[0],xmm4[3] -; NOGATHER-NEXT: vinsertf128 $1, %xmm4, %ymm2, %ymm2 +; NOGATHER-NEXT: vextractf128 $1, %ymm2, %xmm3 +; NOGATHER-NEXT: vmovq %xmm3, %rax +; NOGATHER-NEXT: vextractf128 $1, %ymm1, %xmm3 +; NOGATHER-NEXT: vinsertps {{.*#+}} xmm3 = xmm3[0,1],mem[0],xmm3[3] +; NOGATHER-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1 ; NOGATHER-NEXT: .LBB7_14: # %else17 ; NOGATHER-NEXT: vpextrb $14, %xmm0, %eax ; NOGATHER-NEXT: testb $1, %al ; NOGATHER-NEXT: je .LBB7_16 ; NOGATHER-NEXT: # %bb.15: # %cond.load19 -; NOGATHER-NEXT: vextractf128 $1, %ymm3, %xmm3 -; NOGATHER-NEXT: vpextrq $1, %xmm3, %rax -; NOGATHER-NEXT: vextractf128 $1, %ymm2, %xmm3 -; NOGATHER-NEXT: vinsertps {{.*#+}} xmm3 = xmm3[0,1,2],mem[0] -; NOGATHER-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2 +; NOGATHER-NEXT: vextractf128 $1, %ymm2, %xmm0 +; NOGATHER-NEXT: vpextrq $1, %xmm0, %rax +; NOGATHER-NEXT: vextractf128 $1, %ymm1, %xmm0 +; NOGATHER-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] +; NOGATHER-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm1 ; NOGATHER-NEXT: .LBB7_16: # %else20 -; NOGATHER-NEXT: vpmovzxwd {{.*#+}} xmm3 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero -; NOGATHER-NEXT: vpslld $31, %xmm3, %xmm3 -; NOGATHER-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7] -; NOGATHER-NEXT: vpslld $31, %xmm0, %xmm0 -; NOGATHER-NEXT: vinsertf128 $1, %xmm0, %ymm3, %ymm0 -; NOGATHER-NEXT: vblendvps %ymm0, %ymm2, %ymm1, %ymm0 +; NOGATHER-NEXT: vmovaps %ymm1, %ymm0 ; NOGATHER-NEXT: retq entry: %ld = load <8 x float*>, <8 x float*>* %ptr @@ -585,50 +565,44 @@ define <4 x i64> @masked_gather_v4i64(<4 x i64*>* %ptr, <4 x i1> %masks, <4 x i6 ; ; NOGATHER-LABEL: masked_gather_v4i64: ; NOGATHER: # %bb.0: # %entry -; NOGATHER-NEXT: vmovdqa (%rdi), %ymm3 +; NOGATHER-NEXT: vmovdqa (%rdi), %ymm2 ; NOGATHER-NEXT: vpextrb $0, %xmm0, %eax -; NOGATHER-NEXT: # implicit-def: $ymm2 ; NOGATHER-NEXT: testb $1, %al ; NOGATHER-NEXT: je .LBB8_2 ; NOGATHER-NEXT: # %bb.1: # %cond.load -; NOGATHER-NEXT: vmovq %xmm3, %rax -; NOGATHER-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero +; NOGATHER-NEXT: vmovq %xmm2, %rax +; NOGATHER-NEXT: vpinsrq $0, (%rax), %xmm1, %xmm3 +; NOGATHER-NEXT: vblendps {{.*#+}} ymm1 = ymm3[0,1,2,3],ymm1[4,5,6,7] ; NOGATHER-NEXT: .LBB8_2: # %else ; NOGATHER-NEXT: vpextrb $4, %xmm0, %eax ; NOGATHER-NEXT: testb $1, %al ; NOGATHER-NEXT: je .LBB8_4 ; NOGATHER-NEXT: # %bb.3: # %cond.load1 -; NOGATHER-NEXT: vpextrq $1, %xmm3, %rax -; NOGATHER-NEXT: vpinsrq $1, (%rax), %xmm2, %xmm4 -; NOGATHER-NEXT: vblendps {{.*#+}} ymm2 = ymm4[0,1,2,3],ymm2[4,5,6,7] +; NOGATHER-NEXT: vpextrq $1, %xmm2, %rax +; NOGATHER-NEXT: vpinsrq $1, (%rax), %xmm1, %xmm3 +; NOGATHER-NEXT: vblendps {{.*#+}} ymm1 = ymm3[0,1,2,3],ymm1[4,5,6,7] ; NOGATHER-NEXT: .LBB8_4: # %else2 ; NOGATHER-NEXT: vpextrb $8, %xmm0, %eax ; NOGATHER-NEXT: testb $1, %al ; NOGATHER-NEXT: je .LBB8_6 ; NOGATHER-NEXT: # %bb.5: # %cond.load4 -; NOGATHER-NEXT: vextractf128 $1, %ymm3, %xmm4 -; NOGATHER-NEXT: vmovq %xmm4, %rax -; NOGATHER-NEXT: vextractf128 $1, %ymm2, %xmm4 -; NOGATHER-NEXT: vpinsrq $0, (%rax), %xmm4, %xmm4 -; NOGATHER-NEXT: vinsertf128 $1, %xmm4, %ymm2, %ymm2 +; NOGATHER-NEXT: vextractf128 $1, %ymm2, %xmm3 +; NOGATHER-NEXT: vmovq %xmm3, %rax +; NOGATHER-NEXT: vextractf128 $1, %ymm1, %xmm3 +; NOGATHER-NEXT: vpinsrq $0, (%rax), %xmm3, %xmm3 +; NOGATHER-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1 ; NOGATHER-NEXT: .LBB8_6: # %else5 ; NOGATHER-NEXT: vpextrb $12, %xmm0, %eax ; NOGATHER-NEXT: testb $1, %al ; NOGATHER-NEXT: je .LBB8_8 ; NOGATHER-NEXT: # %bb.7: # %cond.load7 -; NOGATHER-NEXT: vextractf128 $1, %ymm3, %xmm3 -; NOGATHER-NEXT: vpextrq $1, %xmm3, %rax -; NOGATHER-NEXT: vextractf128 $1, %ymm2, %xmm3 -; NOGATHER-NEXT: vpinsrq $1, (%rax), %xmm3, %xmm3 -; NOGATHER-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2 +; NOGATHER-NEXT: vextractf128 $1, %ymm2, %xmm0 +; NOGATHER-NEXT: vpextrq $1, %xmm0, %rax +; NOGATHER-NEXT: vextractf128 $1, %ymm1, %xmm0 +; NOGATHER-NEXT: vpinsrq $1, (%rax), %xmm0, %xmm0 +; NOGATHER-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm1 ; NOGATHER-NEXT: .LBB8_8: # %else8 -; NOGATHER-NEXT: vpslld $31, %xmm0, %xmm0 -; NOGATHER-NEXT: vpsrad $31, %xmm0, %xmm0 -; NOGATHER-NEXT: vpmovsxdq %xmm0, %xmm3 -; NOGATHER-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] -; NOGATHER-NEXT: vpmovsxdq %xmm0, %xmm0 -; NOGATHER-NEXT: vinsertf128 $1, %xmm0, %ymm3, %ymm0 -; NOGATHER-NEXT: vblendvpd %ymm0, %ymm2, %ymm1, %ymm0 +; NOGATHER-NEXT: vmovaps %ymm1, %ymm0 ; NOGATHER-NEXT: retq entry: %ld = load <4 x i64*>, <4 x i64*>* %ptr @@ -660,50 +634,44 @@ define <4 x double> @masked_gather_v4double(<4 x double*>* %ptr, <4 x i1> %masks ; ; NOGATHER-LABEL: masked_gather_v4double: ; NOGATHER: # %bb.0: # %entry -; NOGATHER-NEXT: vmovdqa (%rdi), %ymm3 +; NOGATHER-NEXT: vmovdqa (%rdi), %ymm2 ; NOGATHER-NEXT: vpextrb $0, %xmm0, %eax -; NOGATHER-NEXT: # implicit-def: $ymm2 ; NOGATHER-NEXT: testb $1, %al ; NOGATHER-NEXT: je .LBB9_2 ; NOGATHER-NEXT: # %bb.1: # %cond.load -; NOGATHER-NEXT: vmovq %xmm3, %rax -; NOGATHER-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero +; NOGATHER-NEXT: vmovq %xmm2, %rax +; NOGATHER-NEXT: vmovsd {{.*#+}} xmm3 = mem[0],zero +; NOGATHER-NEXT: vblendpd {{.*#+}} ymm1 = ymm3[0],ymm1[1,2,3] ; NOGATHER-NEXT: .LBB9_2: # %else ; NOGATHER-NEXT: vpextrb $4, %xmm0, %eax ; NOGATHER-NEXT: testb $1, %al ; NOGATHER-NEXT: je .LBB9_4 ; NOGATHER-NEXT: # %bb.3: # %cond.load1 -; NOGATHER-NEXT: vpextrq $1, %xmm3, %rax -; NOGATHER-NEXT: vmovhpd {{.*#+}} xmm4 = xmm2[0],mem[0] -; NOGATHER-NEXT: vblendpd {{.*#+}} ymm2 = ymm4[0,1],ymm2[2,3] +; NOGATHER-NEXT: vpextrq $1, %xmm2, %rax +; NOGATHER-NEXT: vmovhpd {{.*#+}} xmm3 = xmm1[0],mem[0] +; NOGATHER-NEXT: vblendpd {{.*#+}} ymm1 = ymm3[0,1],ymm1[2,3] ; NOGATHER-NEXT: .LBB9_4: # %else2 ; NOGATHER-NEXT: vpextrb $8, %xmm0, %eax ; NOGATHER-NEXT: testb $1, %al ; NOGATHER-NEXT: je .LBB9_6 ; NOGATHER-NEXT: # %bb.5: # %cond.load4 -; NOGATHER-NEXT: vextractf128 $1, %ymm3, %xmm4 -; NOGATHER-NEXT: vmovq %xmm4, %rax -; NOGATHER-NEXT: vextractf128 $1, %ymm2, %xmm4 -; NOGATHER-NEXT: vmovlpd {{.*#+}} xmm4 = mem[0],xmm4[1] -; NOGATHER-NEXT: vinsertf128 $1, %xmm4, %ymm2, %ymm2 +; NOGATHER-NEXT: vextractf128 $1, %ymm2, %xmm3 +; NOGATHER-NEXT: vmovq %xmm3, %rax +; NOGATHER-NEXT: vextractf128 $1, %ymm1, %xmm3 +; NOGATHER-NEXT: vmovlpd {{.*#+}} xmm3 = mem[0],xmm3[1] +; NOGATHER-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1 ; NOGATHER-NEXT: .LBB9_6: # %else5 ; NOGATHER-NEXT: vpextrb $12, %xmm0, %eax ; NOGATHER-NEXT: testb $1, %al ; NOGATHER-NEXT: je .LBB9_8 ; NOGATHER-NEXT: # %bb.7: # %cond.load7 -; NOGATHER-NEXT: vextractf128 $1, %ymm3, %xmm3 -; NOGATHER-NEXT: vpextrq $1, %xmm3, %rax -; NOGATHER-NEXT: vextractf128 $1, %ymm2, %xmm3 -; NOGATHER-NEXT: vmovhpd {{.*#+}} xmm3 = xmm3[0],mem[0] -; NOGATHER-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2 +; NOGATHER-NEXT: vextractf128 $1, %ymm2, %xmm0 +; NOGATHER-NEXT: vpextrq $1, %xmm0, %rax +; NOGATHER-NEXT: vextractf128 $1, %ymm1, %xmm0 +; NOGATHER-NEXT: vmovhpd {{.*#+}} xmm0 = xmm0[0],mem[0] +; NOGATHER-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm1 ; NOGATHER-NEXT: .LBB9_8: # %else8 -; NOGATHER-NEXT: vpslld $31, %xmm0, %xmm0 -; NOGATHER-NEXT: vpsrad $31, %xmm0, %xmm0 -; NOGATHER-NEXT: vpmovsxdq %xmm0, %xmm3 -; NOGATHER-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] -; NOGATHER-NEXT: vpmovsxdq %xmm0, %xmm0 -; NOGATHER-NEXT: vinsertf128 $1, %xmm0, %ymm3, %ymm0 -; NOGATHER-NEXT: vblendvpd %ymm0, %ymm2, %ymm1, %ymm0 +; NOGATHER-NEXT: vmovapd %ymm1, %ymm0 ; NOGATHER-NEXT: retq entry: %ld = load <4 x double*>, <4 x double*>* %ptr @@ -733,24 +701,22 @@ define <2 x i64> @masked_gather_v2i64(<2 x i64*>* %ptr, <2 x i1> %masks, <2 x i6 ; ; NOGATHER-LABEL: masked_gather_v2i64: ; NOGATHER: # %bb.0: # %entry -; NOGATHER-NEXT: vmovdqa (%rdi), %xmm3 +; NOGATHER-NEXT: vmovdqa (%rdi), %xmm2 ; NOGATHER-NEXT: vpextrb $0, %xmm0, %eax -; NOGATHER-NEXT: # implicit-def: $xmm2 ; NOGATHER-NEXT: testb $1, %al ; NOGATHER-NEXT: je .LBB10_2 ; NOGATHER-NEXT: # %bb.1: # %cond.load -; NOGATHER-NEXT: vmovq %xmm3, %rax -; NOGATHER-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero +; NOGATHER-NEXT: vmovq %xmm2, %rax +; NOGATHER-NEXT: vpinsrq $0, (%rax), %xmm1, %xmm1 ; NOGATHER-NEXT: .LBB10_2: # %else ; NOGATHER-NEXT: vpextrb $8, %xmm0, %eax ; NOGATHER-NEXT: testb $1, %al ; NOGATHER-NEXT: je .LBB10_4 ; NOGATHER-NEXT: # %bb.3: # %cond.load1 -; NOGATHER-NEXT: vpextrq $1, %xmm3, %rax -; NOGATHER-NEXT: vpinsrq $1, (%rax), %xmm2, %xmm2 +; NOGATHER-NEXT: vpextrq $1, %xmm2, %rax +; NOGATHER-NEXT: vpinsrq $1, (%rax), %xmm1, %xmm1 ; NOGATHER-NEXT: .LBB10_4: # %else2 -; NOGATHER-NEXT: vpsllq $63, %xmm0, %xmm0 -; NOGATHER-NEXT: vblendvpd %xmm0, %xmm2, %xmm1, %xmm0 +; NOGATHER-NEXT: vmovdqa %xmm1, %xmm0 ; NOGATHER-NEXT: retq entry: %ld = load <2 x i64*>, <2 x i64*>* %ptr @@ -780,24 +746,22 @@ define <2 x double> @masked_gather_v2double(<2 x double*>* %ptr, <2 x i1> %masks ; ; NOGATHER-LABEL: masked_gather_v2double: ; NOGATHER: # %bb.0: # %entry -; NOGATHER-NEXT: vmovdqa (%rdi), %xmm3 +; NOGATHER-NEXT: vmovdqa (%rdi), %xmm2 ; NOGATHER-NEXT: vpextrb $0, %xmm0, %eax -; NOGATHER-NEXT: # implicit-def: $xmm2 ; NOGATHER-NEXT: testb $1, %al ; NOGATHER-NEXT: je .LBB11_2 ; NOGATHER-NEXT: # %bb.1: # %cond.load -; NOGATHER-NEXT: vmovq %xmm3, %rax -; NOGATHER-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero +; NOGATHER-NEXT: vmovq %xmm2, %rax +; NOGATHER-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] ; NOGATHER-NEXT: .LBB11_2: # %else ; NOGATHER-NEXT: vpextrb $8, %xmm0, %eax ; NOGATHER-NEXT: testb $1, %al ; NOGATHER-NEXT: je .LBB11_4 ; NOGATHER-NEXT: # %bb.3: # %cond.load1 -; NOGATHER-NEXT: vpextrq $1, %xmm3, %rax -; NOGATHER-NEXT: vmovhpd {{.*#+}} xmm2 = xmm2[0],mem[0] +; NOGATHER-NEXT: vpextrq $1, %xmm2, %rax +; NOGATHER-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] ; NOGATHER-NEXT: .LBB11_4: # %else2 -; NOGATHER-NEXT: vpsllq $63, %xmm0, %xmm0 -; NOGATHER-NEXT: vblendvpd %xmm0, %xmm2, %xmm1, %xmm0 +; NOGATHER-NEXT: vmovapd %xmm1, %xmm0 ; NOGATHER-NEXT: retq entry: %ld = load <2 x double*>, <2 x double*>* %ptr diff --git a/llvm/test/CodeGen/X86/masked_gather_scatter.ll b/llvm/test/CodeGen/X86/masked_gather_scatter.ll index e8f0321544da..0aef996e54e1 100644 --- a/llvm/test/CodeGen/X86/masked_gather_scatter.ll +++ b/llvm/test/CodeGen/X86/masked_gather_scatter.ll @@ -1658,38 +1658,35 @@ declare <3 x i32> @llvm.masked.gather.v3i32.v3p0i32(<3 x i32*>, i32, <3 x i1>, < define <3 x i32> @test30(<3 x i32*> %base, <3 x i32> %ind, <3 x i1> %mask, <3 x i32> %src0) { ; KNL_64-LABEL: test30: ; KNL_64: # %bb.0: -; KNL_64-NEXT: # kill: def $xmm3 killed $xmm3 def $zmm3 ; KNL_64-NEXT: vpslld $31, %xmm2, %xmm2 -; KNL_64-NEXT: vptestmd %zmm2, %zmm2, %k1 -; KNL_64-NEXT: kmovw %k1, %eax +; KNL_64-NEXT: vptestmd %zmm2, %zmm2, %k0 +; KNL_64-NEXT: kmovw %k0, %eax ; KNL_64-NEXT: vpmovsxdq %xmm1, %ymm1 ; KNL_64-NEXT: vpsllq $2, %ymm1, %ymm1 -; KNL_64-NEXT: vpaddq %ymm1, %ymm0, %ymm1 +; KNL_64-NEXT: vpaddq %ymm1, %ymm0, %ymm0 ; KNL_64-NEXT: testb $1, %al -; KNL_64-NEXT: # implicit-def: $xmm0 ; KNL_64-NEXT: je .LBB31_2 ; KNL_64-NEXT: # %bb.1: # %cond.load -; KNL_64-NEXT: vmovq %xmm1, %rax -; KNL_64-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; KNL_64-NEXT: vmovq %xmm0, %rax +; KNL_64-NEXT: vpinsrd $0, (%rax), %xmm3, %xmm3 ; KNL_64-NEXT: .LBB31_2: # %else -; KNL_64-NEXT: kshiftrw $1, %k1, %k0 -; KNL_64-NEXT: kmovw %k0, %eax +; KNL_64-NEXT: kshiftrw $1, %k0, %k1 +; KNL_64-NEXT: kmovw %k1, %eax ; KNL_64-NEXT: testb $1, %al ; KNL_64-NEXT: je .LBB31_4 ; KNL_64-NEXT: # %bb.3: # %cond.load1 -; KNL_64-NEXT: vpextrq $1, %xmm1, %rax -; KNL_64-NEXT: vpinsrd $1, (%rax), %xmm0, %xmm0 +; KNL_64-NEXT: vpextrq $1, %xmm0, %rax +; KNL_64-NEXT: vpinsrd $1, (%rax), %xmm3, %xmm3 ; KNL_64-NEXT: .LBB31_4: # %else2 -; KNL_64-NEXT: kshiftrw $2, %k1, %k0 +; KNL_64-NEXT: kshiftrw $2, %k0, %k0 ; KNL_64-NEXT: kmovw %k0, %eax ; KNL_64-NEXT: testb $1, %al ; KNL_64-NEXT: je .LBB31_6 ; KNL_64-NEXT: # %bb.5: # %cond.load4 -; KNL_64-NEXT: vextracti128 $1, %ymm1, %xmm1 -; KNL_64-NEXT: vmovq %xmm1, %rax -; KNL_64-NEXT: vpinsrd $2, (%rax), %xmm0, %xmm0 +; KNL_64-NEXT: vextracti128 $1, %ymm0, %xmm0 +; KNL_64-NEXT: vmovq %xmm0, %rax +; KNL_64-NEXT: vpinsrd $2, (%rax), %xmm3, %xmm3 ; KNL_64-NEXT: .LBB31_6: # %else5 -; KNL_64-NEXT: vmovdqa32 %zmm0, %zmm3 {%k1} ; KNL_64-NEXT: vmovdqa %xmm3, %xmm0 ; KNL_64-NEXT: vzeroupper ; KNL_64-NEXT: retq @@ -1698,37 +1695,35 @@ define <3 x i32> @test30(<3 x i32*> %base, <3 x i32> %ind, <3 x i1> %mask, <3 x ; KNL_32: # %bb.0: ; KNL_32-NEXT: subl $12, %esp ; KNL_32-NEXT: .cfi_def_cfa_offset 16 -; KNL_32-NEXT: vpslld $31, %xmm2, %xmm2 -; KNL_32-NEXT: vptestmd %zmm2, %zmm2, %k1 -; KNL_32-NEXT: kmovw %k1, %eax +; KNL_32-NEXT: vmovdqa %xmm0, %xmm3 +; KNL_32-NEXT: vpslld $31, %xmm2, %xmm0 +; KNL_32-NEXT: vptestmd %zmm0, %zmm0, %k0 +; KNL_32-NEXT: kmovw %k0, %eax +; KNL_32-NEXT: vmovdqa {{[0-9]+}}(%esp), %xmm0 ; KNL_32-NEXT: vpslld $2, %xmm1, %xmm1 -; KNL_32-NEXT: vpaddd %xmm1, %xmm0, %xmm2 +; KNL_32-NEXT: vpaddd %xmm1, %xmm3, %xmm1 ; KNL_32-NEXT: testb $1, %al -; KNL_32-NEXT: # implicit-def: $xmm1 ; KNL_32-NEXT: je .LBB31_2 ; KNL_32-NEXT: # %bb.1: # %cond.load -; KNL_32-NEXT: vmovd %xmm2, %eax -; KNL_32-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero +; KNL_32-NEXT: vmovd %xmm1, %eax +; KNL_32-NEXT: vpinsrd $0, (%eax), %xmm0, %xmm0 ; KNL_32-NEXT: .LBB31_2: # %else -; KNL_32-NEXT: kshiftrw $1, %k1, %k0 -; KNL_32-NEXT: kmovw %k0, %eax +; KNL_32-NEXT: kshiftrw $1, %k0, %k1 +; KNL_32-NEXT: kmovw %k1, %eax ; KNL_32-NEXT: testb $1, %al ; KNL_32-NEXT: je .LBB31_4 ; KNL_32-NEXT: # %bb.3: # %cond.load1 -; KNL_32-NEXT: vpextrd $1, %xmm2, %eax -; KNL_32-NEXT: vpinsrd $1, (%eax), %xmm1, %xmm1 +; KNL_32-NEXT: vpextrd $1, %xmm1, %eax +; KNL_32-NEXT: vpinsrd $1, (%eax), %xmm0, %xmm0 ; KNL_32-NEXT: .LBB31_4: # %else2 -; KNL_32-NEXT: vmovdqa {{[0-9]+}}(%esp), %xmm0 -; KNL_32-NEXT: kshiftrw $2, %k1, %k0 +; KNL_32-NEXT: kshiftrw $2, %k0, %k0 ; KNL_32-NEXT: kmovw %k0, %eax ; KNL_32-NEXT: testb $1, %al ; KNL_32-NEXT: je .LBB31_6 ; KNL_32-NEXT: # %bb.5: # %cond.load4 -; KNL_32-NEXT: vpextrd $2, %xmm2, %eax -; KNL_32-NEXT: vpinsrd $2, (%eax), %xmm1, %xmm1 +; KNL_32-NEXT: vpextrd $2, %xmm1, %eax +; KNL_32-NEXT: vpinsrd $2, (%eax), %xmm0, %xmm0 ; KNL_32-NEXT: .LBB31_6: # %else5 -; KNL_32-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} -; KNL_32-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 ; KNL_32-NEXT: addl $12, %esp ; KNL_32-NEXT: .cfi_def_cfa_offset 4 ; KNL_32-NEXT: vzeroupper @@ -1737,36 +1732,34 @@ define <3 x i32> @test30(<3 x i32*> %base, <3 x i32> %ind, <3 x i1> %mask, <3 x ; SKX-LABEL: test30: ; SKX: # %bb.0: ; SKX-NEXT: vpslld $31, %xmm2, %xmm2 -; SKX-NEXT: vpmovd2m %xmm2, %k1 -; SKX-NEXT: kmovw %k1, %eax +; SKX-NEXT: vpmovd2m %xmm2, %k0 +; SKX-NEXT: kmovw %k0, %eax ; SKX-NEXT: vpmovsxdq %xmm1, %ymm1 ; SKX-NEXT: vpsllq $2, %ymm1, %ymm1 -; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm1 +; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 ; SKX-NEXT: testb $1, %al -; SKX-NEXT: # implicit-def: $xmm0 ; SKX-NEXT: je .LBB31_2 ; SKX-NEXT: # %bb.1: # %cond.load -; SKX-NEXT: vmovq %xmm1, %rax -; SKX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; SKX-NEXT: vmovq %xmm0, %rax +; SKX-NEXT: vpinsrd $0, (%rax), %xmm3, %xmm3 ; SKX-NEXT: .LBB31_2: # %else -; SKX-NEXT: kshiftrw $1, %k1, %k0 -; SKX-NEXT: kmovw %k0, %eax +; SKX-NEXT: kshiftrw $1, %k0, %k1 +; SKX-NEXT: kmovw %k1, %eax ; SKX-NEXT: testb $1, %al ; SKX-NEXT: je .LBB31_4 ; SKX-NEXT: # %bb.3: # %cond.load1 -; SKX-NEXT: vpextrq $1, %xmm1, %rax -; SKX-NEXT: vpinsrd $1, (%rax), %xmm0, %xmm0 +; SKX-NEXT: vpextrq $1, %xmm0, %rax +; SKX-NEXT: vpinsrd $1, (%rax), %xmm3, %xmm3 ; SKX-NEXT: .LBB31_4: # %else2 -; SKX-NEXT: kshiftrw $2, %k1, %k0 +; SKX-NEXT: kshiftrw $2, %k0, %k0 ; SKX-NEXT: kmovw %k0, %eax ; SKX-NEXT: testb $1, %al ; SKX-NEXT: je .LBB31_6 ; SKX-NEXT: # %bb.5: # %cond.load4 -; SKX-NEXT: vextracti128 $1, %ymm1, %xmm1 -; SKX-NEXT: vmovq %xmm1, %rax -; SKX-NEXT: vpinsrd $2, (%rax), %xmm0, %xmm0 +; SKX-NEXT: vextracti128 $1, %ymm0, %xmm0 +; SKX-NEXT: vmovq %xmm0, %rax +; SKX-NEXT: vpinsrd $2, (%rax), %xmm3, %xmm3 ; SKX-NEXT: .LBB31_6: # %else5 -; SKX-NEXT: vmovdqa32 %xmm0, %xmm3 {%k1} ; SKX-NEXT: vmovdqa %xmm3, %xmm0 ; SKX-NEXT: vzeroupper ; SKX-NEXT: retq @@ -1775,36 +1768,35 @@ define <3 x i32> @test30(<3 x i32*> %base, <3 x i32> %ind, <3 x i1> %mask, <3 x ; SKX_32: # %bb.0: ; SKX_32-NEXT: subl $12, %esp ; SKX_32-NEXT: .cfi_def_cfa_offset 16 -; SKX_32-NEXT: vpslld $31, %xmm2, %xmm2 -; SKX_32-NEXT: vpmovd2m %xmm2, %k1 -; SKX_32-NEXT: kmovw %k1, %eax +; SKX_32-NEXT: vmovdqa %xmm0, %xmm3 +; SKX_32-NEXT: vpslld $31, %xmm2, %xmm0 +; SKX_32-NEXT: vpmovd2m %xmm0, %k0 +; SKX_32-NEXT: kmovw %k0, %eax +; SKX_32-NEXT: vmovdqa {{[0-9]+}}(%esp), %xmm0 ; SKX_32-NEXT: vpslld $2, %xmm1, %xmm1 -; SKX_32-NEXT: vpaddd %xmm1, %xmm0, %xmm2 +; SKX_32-NEXT: vpaddd %xmm1, %xmm3, %xmm1 ; SKX_32-NEXT: testb $1, %al -; SKX_32-NEXT: # implicit-def: $xmm1 ; SKX_32-NEXT: je .LBB31_2 ; SKX_32-NEXT: # %bb.1: # %cond.load -; SKX_32-NEXT: vmovd %xmm2, %eax -; SKX_32-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero +; SKX_32-NEXT: vmovd %xmm1, %eax +; SKX_32-NEXT: vpinsrd $0, (%eax), %xmm0, %xmm0 ; SKX_32-NEXT: .LBB31_2: # %else -; SKX_32-NEXT: kshiftrw $1, %k1, %k0 -; SKX_32-NEXT: kmovw %k0, %eax +; SKX_32-NEXT: kshiftrw $1, %k0, %k1 +; SKX_32-NEXT: kmovw %k1, %eax ; SKX_32-NEXT: testb $1, %al ; SKX_32-NEXT: je .LBB31_4 ; SKX_32-NEXT: # %bb.3: # %cond.load1 -; SKX_32-NEXT: vpextrd $1, %xmm2, %eax -; SKX_32-NEXT: vpinsrd $1, (%eax), %xmm1, %xmm1 +; SKX_32-NEXT: vpextrd $1, %xmm1, %eax +; SKX_32-NEXT: vpinsrd $1, (%eax), %xmm0, %xmm0 ; SKX_32-NEXT: .LBB31_4: # %else2 -; SKX_32-NEXT: vmovdqa {{[0-9]+}}(%esp), %xmm0 -; SKX_32-NEXT: kshiftrw $2, %k1, %k0 +; SKX_32-NEXT: kshiftrw $2, %k0, %k0 ; SKX_32-NEXT: kmovw %k0, %eax ; SKX_32-NEXT: testb $1, %al ; SKX_32-NEXT: je .LBB31_6 ; SKX_32-NEXT: # %bb.5: # %cond.load4 -; SKX_32-NEXT: vpextrd $2, %xmm2, %eax -; SKX_32-NEXT: vpinsrd $2, (%eax), %xmm1, %xmm1 +; SKX_32-NEXT: vpextrd $2, %xmm1, %eax +; SKX_32-NEXT: vpinsrd $2, (%eax), %xmm0, %xmm0 ; SKX_32-NEXT: .LBB31_6: # %else5 -; SKX_32-NEXT: vmovdqa32 %xmm1, %xmm0 {%k1} ; SKX_32-NEXT: addl $12, %esp ; SKX_32-NEXT: .cfi_def_cfa_offset 4 ; SKX_32-NEXT: retl diff --git a/llvm/test/Transforms/ScalarizeMaskedMemIntrin/X86/expand-masked-gather.ll b/llvm/test/Transforms/ScalarizeMaskedMemIntrin/X86/expand-masked-gather.ll index 53d5243615f9..40c0fc4ef6df 100644 --- a/llvm/test/Transforms/ScalarizeMaskedMemIntrin/X86/expand-masked-gather.ll +++ b/llvm/test/Transforms/ScalarizeMaskedMemIntrin/X86/expand-masked-gather.ll @@ -8,10 +8,10 @@ define <2 x i64> @scalarize_v2i64(<2 x i64*> %p, <2 x i1> %mask, <2 x i64> %pass ; CHECK: cond.load: ; CHECK-NEXT: [[PTR0:%.*]] = extractelement <2 x i64*> [[P:%.*]], i32 0 ; CHECK-NEXT: [[LOAD0:%.*]] = load i64, i64* [[PTR0]], align 8 -; CHECK-NEXT: [[RES0:%.*]] = insertelement <2 x i64> undef, i64 [[LOAD0]], i32 0 +; CHECK-NEXT: [[RES0:%.*]] = insertelement <2 x i64> [[PASSTHRU:%.*]], i64 [[LOAD0]], i32 0 ; CHECK-NEXT: br label [[ELSE]] ; CHECK: else: -; CHECK-NEXT: [[RES_PHI_ELSE:%.*]] = phi <2 x i64> [ [[RES0]], [[COND_LOAD]] ], [ undef, [[TMP0:%.*]] ] +; CHECK-NEXT: [[RES_PHI_ELSE:%.*]] = phi <2 x i64> [ [[RES0]], [[COND_LOAD]] ], [ [[PASSTHRU]], [[TMP0:%.*]] ] ; CHECK-NEXT: [[MASK1:%.*]] = extractelement <2 x i1> [[MASK]], i32 1 ; CHECK-NEXT: br i1 [[MASK1]], label [[COND_LOAD1:%.*]], label [[ELSE2:%.*]] ; CHECK: cond.load1: @@ -20,9 +20,8 @@ define <2 x i64> @scalarize_v2i64(<2 x i64*> %p, <2 x i1> %mask, <2 x i64> %pass ; CHECK-NEXT: [[RES1:%.*]] = insertelement <2 x i64> [[RES_PHI_ELSE]], i64 [[LOAD1]], i32 1 ; CHECK-NEXT: br label [[ELSE2]] ; CHECK: else2: -; CHECK-NEXT: [[RES_PHI_SELECT:%.*]] = phi <2 x i64> [ [[RES1]], [[COND_LOAD1]] ], [ [[RES_PHI_ELSE]], [[ELSE]] ] -; CHECK-NEXT: [[TMP1:%.*]] = select <2 x i1> [[MASK]], <2 x i64> [[RES_PHI_SELECT]], <2 x i64> [[PASSTHRU:%.*]] -; CHECK-NEXT: ret <2 x i64> [[TMP1]] +; CHECK-NEXT: [[RES_PHI_ELSE3:%.*]] = phi <2 x i64> [ [[RES1]], [[COND_LOAD1]] ], [ [[RES_PHI_ELSE]], [[ELSE]] ] +; CHECK-NEXT: ret <2 x i64> [[RES_PHI_ELSE3]] ; %ret = call <2 x i64> @llvm.masked.gather.v2i64.v2p0i64(<2 x i64*> %p, i32 8, <2 x i1> %mask, <2 x i64> %passthru) ret <2 x i64> %ret @@ -32,12 +31,11 @@ define <2 x i64> @scalarize_v2i64_ones_mask(<2 x i64*> %p, <2 x i64> %passthru) ; CHECK-LABEL: @scalarize_v2i64_ones_mask( ; CHECK-NEXT: [[PTR0:%.*]] = extractelement <2 x i64*> [[P:%.*]], i32 0 ; CHECK-NEXT: [[LOAD0:%.*]] = load i64, i64* [[PTR0]], align 8 -; CHECK-NEXT: [[RES0:%.*]] = insertelement <2 x i64> undef, i64 [[LOAD0]], i32 0 +; CHECK-NEXT: [[RES0:%.*]] = insertelement <2 x i64> [[PASSTHRU:%.*]], i64 [[LOAD0]], i32 0 ; CHECK-NEXT: [[PTR1:%.*]] = extractelement <2 x i64*> [[P]], i32 1 ; CHECK-NEXT: [[LOAD1:%.*]] = load i64, i64* [[PTR1]], align 8 ; CHECK-NEXT: [[RES1:%.*]] = insertelement <2 x i64> [[RES0]], i64 [[LOAD1]], i32 1 -; CHECK-NEXT: [[TMP1:%.*]] = select <2 x i1> , <2 x i64> [[RES1]], <2 x i64> [[PASSTHRU:%.*]] -; CHECK-NEXT: ret <2 x i64> [[TMP1]] +; CHECK-NEXT: ret <2 x i64> [[RES1]] ; %ret = call <2 x i64> @llvm.masked.gather.v2i64.v2p0i64(<2 x i64*> %p, i32 8, <2 x i1> , <2 x i64> %passthru) ret <2 x i64> %ret @@ -45,8 +43,7 @@ define <2 x i64> @scalarize_v2i64_ones_mask(<2 x i64*> %p, <2 x i64> %passthru) define <2 x i64> @scalarize_v2i64_zero_mask(<2 x i64*> %p, <2 x i64> %passthru) { ; CHECK-LABEL: @scalarize_v2i64_zero_mask( -; CHECK-NEXT: [[TMP1:%.*]] = select <2 x i1> zeroinitializer, <2 x i64> undef, <2 x i64> [[PASSTHRU:%.*]] -; CHECK-NEXT: ret <2 x i64> [[TMP1]] +; CHECK-NEXT: ret <2 x i64> [[PASSTHRU:%.*]] ; %ret = call <2 x i64> @llvm.masked.gather.v2i64.v2p0i64(<2 x i64*> %p, i32 8, <2 x i1> , <2 x i64> %passthru) ret <2 x i64> %ret @@ -56,9 +53,8 @@ define <2 x i64> @scalarize_v2i64_const_mask(<2 x i64*> %p, <2 x i64> %passthru) ; CHECK-LABEL: @scalarize_v2i64_const_mask( ; CHECK-NEXT: [[PTR1:%.*]] = extractelement <2 x i64*> [[P:%.*]], i32 1 ; CHECK-NEXT: [[LOAD1:%.*]] = load i64, i64* [[PTR1]], align 8 -; CHECK-NEXT: [[RES1:%.*]] = insertelement <2 x i64> undef, i64 [[LOAD1]], i32 1 -; CHECK-NEXT: [[TMP1:%.*]] = select <2 x i1> , <2 x i64> [[RES1]], <2 x i64> [[PASSTHRU:%.*]] -; CHECK-NEXT: ret <2 x i64> [[TMP1]] +; CHECK-NEXT: [[RES1:%.*]] = insertelement <2 x i64> [[PASSTHRU:%.*]], i64 [[LOAD1]], i32 1 +; CHECK-NEXT: ret <2 x i64> [[RES1]] ; %ret = call <2 x i64> @llvm.masked.gather.v2i64.v2p0i64(<2 x i64*> %p, i32 8, <2 x i1> , <2 x i64> %passthru) ret <2 x i64> %ret