[ScalarizeMaskedMemIntrin] When expanding masked gathers, start with the passthru vector and insert the new load results into it.

Previously we started with undef and did a final merge with the passthru at the end.

llvm-svn: 343273
This commit is contained in:
Craig Topper 2018-09-27 21:28:59 +00:00
parent 45ad631b4c
commit 6911bfe263
4 changed files with 237 additions and 296 deletions

View File

@ -368,10 +368,8 @@ static void scalarizeMaskedGather(CallInst *CI) {
Builder.SetCurrentDebugLocation(CI->getDebugLoc());
Value *UndefVal = UndefValue::get(VecType);
// The result vector
Value *VResult = UndefVal;
Value *VResult = Src0;
unsigned VectorWidth = VecType->getNumElements();
// Shorten the way if the mask is a vector of constants.
@ -386,28 +384,17 @@ static void scalarizeMaskedGather(CallInst *CI) {
VResult = Builder.CreateInsertElement(
VResult, Load, Builder.getInt32(Idx), "Res" + Twine(Idx));
}
Value *NewI = Builder.CreateSelect(Mask, VResult, Src0);
CI->replaceAllUsesWith(NewI);
CI->replaceAllUsesWith(VResult);
CI->eraseFromParent();
return;
}
PHINode *Phi = nullptr;
Value *PrevPhi = UndefVal;
for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
// Fill the "else" block, created in the previous iteration
//
// %Mask1 = extractelement <16 x i1> %Mask, i32 1
// br i1 %Mask1, label %cond.load, label %else
//
if (Idx > 0) {
Phi = Builder.CreatePHI(VecType, 2, "res.phi.else");
Phi->addIncoming(VResult, CondBlock);
Phi->addIncoming(PrevPhi, PrevIfBlock);
PrevPhi = Phi;
VResult = Phi;
}
Value *Predicate = Builder.CreateExtractElement(Mask, Builder.getInt32(Idx),
"Mask" + Twine(Idx));
@ -425,8 +412,9 @@ static void scalarizeMaskedGather(CallInst *CI) {
"Ptr" + Twine(Idx));
LoadInst *Load =
Builder.CreateAlignedLoad(Ptr, AlignVal, "Load" + Twine(Idx));
VResult = Builder.CreateInsertElement(VResult, Load, Builder.getInt32(Idx),
"Res" + Twine(Idx));
Value *NewVResult = Builder.CreateInsertElement(VResult, Load,
Builder.getInt32(Idx),
"Res" + Twine(Idx));
// Create "else" block, fill it in the next iteration
BasicBlock *NewIfBlock = CondBlock->splitBasicBlock(InsertPt, "else");
@ -436,13 +424,14 @@ static void scalarizeMaskedGather(CallInst *CI) {
OldBr->eraseFromParent();
PrevIfBlock = IfBlock;
IfBlock = NewIfBlock;
PHINode *Phi = Builder.CreatePHI(VecType, 2, "res.phi.else");
Phi->addIncoming(NewVResult, CondBlock);
Phi->addIncoming(VResult, PrevIfBlock);
VResult = Phi;
}
Phi = Builder.CreatePHI(VecType, 2, "res.phi.select");
Phi->addIncoming(VResult, CondBlock);
Phi->addIncoming(PrevPhi, PrevIfBlock);
Value *NewI = Builder.CreateSelect(Mask, Phi, Src0);
CI->replaceAllUsesWith(NewI);
CI->replaceAllUsesWith(VResult);
CI->eraseFromParent();
}

View File

@ -30,25 +30,24 @@ define <2 x i32> @masked_gather_v2i32(<2 x i32*>* %ptr, <2 x i1> %masks, <2 x i3
;
; NOGATHER-LABEL: masked_gather_v2i32:
; NOGATHER: # %bb.0: # %entry
; NOGATHER-NEXT: vmovdqa (%rdi), %xmm3
; NOGATHER-NEXT: vmovdqa (%rdi), %xmm2
; NOGATHER-NEXT: vpextrb $0, %xmm0, %eax
; NOGATHER-NEXT: # implicit-def: $xmm2
; NOGATHER-NEXT: testb $1, %al
; NOGATHER-NEXT: je .LBB0_2
; NOGATHER-NEXT: # %bb.1: # %cond.load
; NOGATHER-NEXT: vmovq %xmm3, %rax
; NOGATHER-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero
; NOGATHER-NEXT: vmovq %xmm2, %rax
; NOGATHER-NEXT: movl (%rax), %eax
; NOGATHER-NEXT: vpinsrq $0, %rax, %xmm1, %xmm1
; NOGATHER-NEXT: .LBB0_2: # %else
; NOGATHER-NEXT: vpextrb $8, %xmm0, %eax
; NOGATHER-NEXT: testb $1, %al
; NOGATHER-NEXT: je .LBB0_4
; NOGATHER-NEXT: # %bb.3: # %cond.load1
; NOGATHER-NEXT: vpextrq $1, %xmm3, %rax
; NOGATHER-NEXT: vpextrq $1, %xmm2, %rax
; NOGATHER-NEXT: movl (%rax), %eax
; NOGATHER-NEXT: vpinsrq $1, %rax, %xmm2, %xmm2
; NOGATHER-NEXT: vpinsrq $1, %rax, %xmm1, %xmm1
; NOGATHER-NEXT: .LBB0_4: # %else2
; NOGATHER-NEXT: vpsllq $63, %xmm0, %xmm0
; NOGATHER-NEXT: vblendvpd %xmm0, %xmm2, %xmm1, %xmm0
; NOGATHER-NEXT: vmovdqa %xmm1, %xmm0
; NOGATHER-NEXT: retq
entry:
%ld = load <2 x i32*>, <2 x i32*>* %ptr
@ -80,26 +79,24 @@ define <4 x i32> @masked_gather_v2i32_concat(<2 x i32*>* %ptr, <2 x i1> %masks,
;
; NOGATHER-LABEL: masked_gather_v2i32_concat:
; NOGATHER: # %bb.0: # %entry
; NOGATHER-NEXT: vmovdqa (%rdi), %xmm3
; NOGATHER-NEXT: vmovdqa (%rdi), %xmm2
; NOGATHER-NEXT: vpextrb $0, %xmm0, %eax
; NOGATHER-NEXT: # implicit-def: $xmm2
; NOGATHER-NEXT: testb $1, %al
; NOGATHER-NEXT: je .LBB1_2
; NOGATHER-NEXT: # %bb.1: # %cond.load
; NOGATHER-NEXT: vmovq %xmm3, %rax
; NOGATHER-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero
; NOGATHER-NEXT: vmovq %xmm2, %rax
; NOGATHER-NEXT: movl (%rax), %eax
; NOGATHER-NEXT: vpinsrq $0, %rax, %xmm1, %xmm1
; NOGATHER-NEXT: .LBB1_2: # %else
; NOGATHER-NEXT: vpextrb $8, %xmm0, %eax
; NOGATHER-NEXT: testb $1, %al
; NOGATHER-NEXT: je .LBB1_4
; NOGATHER-NEXT: # %bb.3: # %cond.load1
; NOGATHER-NEXT: vpextrq $1, %xmm3, %rax
; NOGATHER-NEXT: vpextrq $1, %xmm2, %rax
; NOGATHER-NEXT: movl (%rax), %eax
; NOGATHER-NEXT: vpinsrq $1, %rax, %xmm2, %xmm2
; NOGATHER-NEXT: vpinsrq $1, %rax, %xmm1, %xmm1
; NOGATHER-NEXT: .LBB1_4: # %else2
; NOGATHER-NEXT: vpsllq $63, %xmm0, %xmm0
; NOGATHER-NEXT: vblendvpd %xmm0, %xmm2, %xmm1, %xmm0
; NOGATHER-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3]
; NOGATHER-NEXT: vpshufd {{.*#+}} xmm0 = xmm1[0,2,2,3]
; NOGATHER-NEXT: retq
entry:
%ld = load <2 x i32*>, <2 x i32*>* %ptr
@ -132,25 +129,23 @@ define <2 x float> @masked_gather_v2float(<2 x float*>* %ptr, <2 x i1> %masks, <
;
; NOGATHER-LABEL: masked_gather_v2float:
; NOGATHER: # %bb.0: # %entry
; NOGATHER-NEXT: vmovdqa (%rdi), %xmm3
; NOGATHER-NEXT: vmovdqa (%rdi), %xmm2
; NOGATHER-NEXT: vpextrb $0, %xmm0, %eax
; NOGATHER-NEXT: # implicit-def: $xmm2
; NOGATHER-NEXT: testb $1, %al
; NOGATHER-NEXT: je .LBB2_2
; NOGATHER-NEXT: # %bb.1: # %cond.load
; NOGATHER-NEXT: vmovq %xmm3, %rax
; NOGATHER-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
; NOGATHER-NEXT: vmovq %xmm2, %rax
; NOGATHER-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
; NOGATHER-NEXT: vblendps {{.*#+}} xmm1 = xmm3[0],xmm1[1,2,3]
; NOGATHER-NEXT: .LBB2_2: # %else
; NOGATHER-NEXT: vpextrb $8, %xmm0, %eax
; NOGATHER-NEXT: testb $1, %al
; NOGATHER-NEXT: je .LBB2_4
; NOGATHER-NEXT: # %bb.3: # %cond.load1
; NOGATHER-NEXT: vpextrq $1, %xmm3, %rax
; NOGATHER-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0],mem[0],xmm2[2,3]
; NOGATHER-NEXT: vpextrq $1, %xmm2, %rax
; NOGATHER-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[2,3]
; NOGATHER-NEXT: .LBB2_4: # %else2
; NOGATHER-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; NOGATHER-NEXT: vpslld $31, %xmm0, %xmm0
; NOGATHER-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0
; NOGATHER-NEXT: vmovaps %xmm1, %xmm0
; NOGATHER-NEXT: retq
entry:
%ld = load <2 x float*>, <2 x float*>* %ptr
@ -180,25 +175,23 @@ define <4 x float> @masked_gather_v2float_concat(<2 x float*>* %ptr, <2 x i1> %m
;
; NOGATHER-LABEL: masked_gather_v2float_concat:
; NOGATHER: # %bb.0: # %entry
; NOGATHER-NEXT: vmovdqa (%rdi), %xmm3
; NOGATHER-NEXT: vmovdqa (%rdi), %xmm2
; NOGATHER-NEXT: vpextrb $0, %xmm0, %eax
; NOGATHER-NEXT: # implicit-def: $xmm2
; NOGATHER-NEXT: testb $1, %al
; NOGATHER-NEXT: je .LBB3_2
; NOGATHER-NEXT: # %bb.1: # %cond.load
; NOGATHER-NEXT: vmovq %xmm3, %rax
; NOGATHER-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
; NOGATHER-NEXT: vmovq %xmm2, %rax
; NOGATHER-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
; NOGATHER-NEXT: vblendps {{.*#+}} xmm1 = xmm3[0],xmm1[1,2,3]
; NOGATHER-NEXT: .LBB3_2: # %else
; NOGATHER-NEXT: vpextrb $8, %xmm0, %eax
; NOGATHER-NEXT: testb $1, %al
; NOGATHER-NEXT: je .LBB3_4
; NOGATHER-NEXT: # %bb.3: # %cond.load1
; NOGATHER-NEXT: vpextrq $1, %xmm3, %rax
; NOGATHER-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0],mem[0],xmm2[2,3]
; NOGATHER-NEXT: vpextrq $1, %xmm2, %rax
; NOGATHER-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[2,3]
; NOGATHER-NEXT: .LBB3_4: # %else2
; NOGATHER-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; NOGATHER-NEXT: vpslld $31, %xmm0, %xmm0
; NOGATHER-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0
; NOGATHER-NEXT: vmovaps %xmm1, %xmm0
; NOGATHER-NEXT: retq
entry:
%ld = load <2 x float*>, <2 x float*>* %ptr
@ -229,27 +222,26 @@ define <4 x i32> @masked_gather_v4i32(<4 x i32*> %ptrs, <4 x i1> %masks, <4 x i3
; NOGATHER-LABEL: masked_gather_v4i32:
; NOGATHER: # %bb.0: # %entry
; NOGATHER-NEXT: vpextrb $0, %xmm1, %eax
; NOGATHER-NEXT: # implicit-def: $xmm3
; NOGATHER-NEXT: testb $1, %al
; NOGATHER-NEXT: je .LBB4_2
; NOGATHER-NEXT: # %bb.1: # %cond.load
; NOGATHER-NEXT: vmovq %xmm0, %rax
; NOGATHER-NEXT: vmovd {{.*#+}} xmm3 = mem[0],zero,zero,zero
; NOGATHER-NEXT: vpinsrd $0, (%rax), %xmm2, %xmm2
; NOGATHER-NEXT: .LBB4_2: # %else
; NOGATHER-NEXT: vpextrb $4, %xmm1, %eax
; NOGATHER-NEXT: testb $1, %al
; NOGATHER-NEXT: je .LBB4_4
; NOGATHER-NEXT: # %bb.3: # %cond.load1
; NOGATHER-NEXT: vpextrq $1, %xmm0, %rax
; NOGATHER-NEXT: vpinsrd $1, (%rax), %xmm3, %xmm3
; NOGATHER-NEXT: vpinsrd $1, (%rax), %xmm2, %xmm2
; NOGATHER-NEXT: .LBB4_4: # %else2
; NOGATHER-NEXT: vpextrb $8, %xmm1, %eax
; NOGATHER-NEXT: testb $1, %al
; NOGATHER-NEXT: je .LBB4_6
; NOGATHER-NEXT: # %bb.5: # %cond.load4
; NOGATHER-NEXT: vextractf128 $1, %ymm0, %xmm4
; NOGATHER-NEXT: vmovq %xmm4, %rax
; NOGATHER-NEXT: vpinsrd $2, (%rax), %xmm3, %xmm3
; NOGATHER-NEXT: vextractf128 $1, %ymm0, %xmm3
; NOGATHER-NEXT: vmovq %xmm3, %rax
; NOGATHER-NEXT: vpinsrd $2, (%rax), %xmm2, %xmm2
; NOGATHER-NEXT: .LBB4_6: # %else5
; NOGATHER-NEXT: vpextrb $12, %xmm1, %eax
; NOGATHER-NEXT: testb $1, %al
@ -257,10 +249,9 @@ define <4 x i32> @masked_gather_v4i32(<4 x i32*> %ptrs, <4 x i1> %masks, <4 x i3
; NOGATHER-NEXT: # %bb.7: # %cond.load7
; NOGATHER-NEXT: vextractf128 $1, %ymm0, %xmm0
; NOGATHER-NEXT: vpextrq $1, %xmm0, %rax
; NOGATHER-NEXT: vpinsrd $3, (%rax), %xmm3, %xmm3
; NOGATHER-NEXT: vpinsrd $3, (%rax), %xmm2, %xmm2
; NOGATHER-NEXT: .LBB4_8: # %else8
; NOGATHER-NEXT: vpslld $31, %xmm1, %xmm0
; NOGATHER-NEXT: vblendvps %xmm0, %xmm3, %xmm2, %xmm0
; NOGATHER-NEXT: vmovdqa %xmm2, %xmm0
; NOGATHER-NEXT: vzeroupper
; NOGATHER-NEXT: retq
entry:
@ -289,27 +280,27 @@ define <4 x float> @masked_gather_v4float(<4 x float*> %ptrs, <4 x i1> %masks, <
; NOGATHER-LABEL: masked_gather_v4float:
; NOGATHER: # %bb.0: # %entry
; NOGATHER-NEXT: vpextrb $0, %xmm1, %eax
; NOGATHER-NEXT: # implicit-def: $xmm3
; NOGATHER-NEXT: testb $1, %al
; NOGATHER-NEXT: je .LBB5_2
; NOGATHER-NEXT: # %bb.1: # %cond.load
; NOGATHER-NEXT: vmovq %xmm0, %rax
; NOGATHER-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
; NOGATHER-NEXT: vblendps {{.*#+}} xmm2 = xmm3[0],xmm2[1,2,3]
; NOGATHER-NEXT: .LBB5_2: # %else
; NOGATHER-NEXT: vpextrb $4, %xmm1, %eax
; NOGATHER-NEXT: testb $1, %al
; NOGATHER-NEXT: je .LBB5_4
; NOGATHER-NEXT: # %bb.3: # %cond.load1
; NOGATHER-NEXT: vpextrq $1, %xmm0, %rax
; NOGATHER-NEXT: vinsertps {{.*#+}} xmm3 = xmm3[0],mem[0],xmm3[2,3]
; NOGATHER-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0],mem[0],xmm2[2,3]
; NOGATHER-NEXT: .LBB5_4: # %else2
; NOGATHER-NEXT: vpextrb $8, %xmm1, %eax
; NOGATHER-NEXT: testb $1, %al
; NOGATHER-NEXT: je .LBB5_6
; NOGATHER-NEXT: # %bb.5: # %cond.load4
; NOGATHER-NEXT: vextractf128 $1, %ymm0, %xmm4
; NOGATHER-NEXT: vmovq %xmm4, %rax
; NOGATHER-NEXT: vinsertps {{.*#+}} xmm3 = xmm3[0,1],mem[0],xmm3[3]
; NOGATHER-NEXT: vextractf128 $1, %ymm0, %xmm3
; NOGATHER-NEXT: vmovq %xmm3, %rax
; NOGATHER-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],mem[0],xmm2[3]
; NOGATHER-NEXT: .LBB5_6: # %else5
; NOGATHER-NEXT: vpextrb $12, %xmm1, %eax
; NOGATHER-NEXT: testb $1, %al
@ -317,10 +308,9 @@ define <4 x float> @masked_gather_v4float(<4 x float*> %ptrs, <4 x i1> %masks, <
; NOGATHER-NEXT: # %bb.7: # %cond.load7
; NOGATHER-NEXT: vextractf128 $1, %ymm0, %xmm0
; NOGATHER-NEXT: vpextrq $1, %xmm0, %rax
; NOGATHER-NEXT: vinsertps {{.*#+}} xmm3 = xmm3[0,1,2],mem[0]
; NOGATHER-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1,2],mem[0]
; NOGATHER-NEXT: .LBB5_8: # %else8
; NOGATHER-NEXT: vpslld $31, %xmm1, %xmm0
; NOGATHER-NEXT: vblendvps %xmm0, %xmm3, %xmm2, %xmm0
; NOGATHER-NEXT: vmovaps %xmm2, %xmm0
; NOGATHER-NEXT: vzeroupper
; NOGATHER-NEXT: retq
entry:
@ -357,86 +347,81 @@ define <8 x i32> @masked_gather_v8i32(<8 x i32*>* %ptr, <8 x i1> %masks, <8 x i3
;
; NOGATHER-LABEL: masked_gather_v8i32:
; NOGATHER: # %bb.0: # %entry
; NOGATHER-NEXT: vmovdqa (%rdi), %ymm4
; NOGATHER-NEXT: vmovdqa 32(%rdi), %ymm3
; NOGATHER-NEXT: vmovdqa (%rdi), %ymm3
; NOGATHER-NEXT: vmovdqa 32(%rdi), %ymm2
; NOGATHER-NEXT: vpextrb $0, %xmm0, %eax
; NOGATHER-NEXT: # implicit-def: $ymm2
; NOGATHER-NEXT: testb $1, %al
; NOGATHER-NEXT: je .LBB6_2
; NOGATHER-NEXT: # %bb.1: # %cond.load
; NOGATHER-NEXT: vmovq %xmm4, %rax
; NOGATHER-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero
; NOGATHER-NEXT: vmovq %xmm3, %rax
; NOGATHER-NEXT: vpinsrd $0, (%rax), %xmm1, %xmm4
; NOGATHER-NEXT: vblendps {{.*#+}} ymm1 = ymm4[0,1,2,3],ymm1[4,5,6,7]
; NOGATHER-NEXT: .LBB6_2: # %else
; NOGATHER-NEXT: vpextrb $2, %xmm0, %eax
; NOGATHER-NEXT: testb $1, %al
; NOGATHER-NEXT: je .LBB6_4
; NOGATHER-NEXT: # %bb.3: # %cond.load1
; NOGATHER-NEXT: vpextrq $1, %xmm4, %rax
; NOGATHER-NEXT: vpinsrd $1, (%rax), %xmm2, %xmm5
; NOGATHER-NEXT: vblendps {{.*#+}} ymm2 = ymm5[0,1,2,3],ymm2[4,5,6,7]
; NOGATHER-NEXT: vpextrq $1, %xmm3, %rax
; NOGATHER-NEXT: vpinsrd $1, (%rax), %xmm1, %xmm4
; NOGATHER-NEXT: vblendps {{.*#+}} ymm1 = ymm4[0,1,2,3],ymm1[4,5,6,7]
; NOGATHER-NEXT: .LBB6_4: # %else2
; NOGATHER-NEXT: vpextrb $4, %xmm0, %eax
; NOGATHER-NEXT: testb $1, %al
; NOGATHER-NEXT: je .LBB6_6
; NOGATHER-NEXT: # %bb.5: # %cond.load4
; NOGATHER-NEXT: vextractf128 $1, %ymm4, %xmm5
; NOGATHER-NEXT: vmovq %xmm5, %rax
; NOGATHER-NEXT: vpinsrd $2, (%rax), %xmm2, %xmm5
; NOGATHER-NEXT: vblendps {{.*#+}} ymm2 = ymm5[0,1,2,3],ymm2[4,5,6,7]
; NOGATHER-NEXT: vextractf128 $1, %ymm3, %xmm4
; NOGATHER-NEXT: vmovq %xmm4, %rax
; NOGATHER-NEXT: vpinsrd $2, (%rax), %xmm1, %xmm4
; NOGATHER-NEXT: vblendps {{.*#+}} ymm1 = ymm4[0,1,2,3],ymm1[4,5,6,7]
; NOGATHER-NEXT: .LBB6_6: # %else5
; NOGATHER-NEXT: vpextrb $6, %xmm0, %eax
; NOGATHER-NEXT: testb $1, %al
; NOGATHER-NEXT: je .LBB6_8
; NOGATHER-NEXT: # %bb.7: # %cond.load7
; NOGATHER-NEXT: vextractf128 $1, %ymm4, %xmm4
; NOGATHER-NEXT: vpextrq $1, %xmm4, %rax
; NOGATHER-NEXT: vpinsrd $3, (%rax), %xmm2, %xmm4
; NOGATHER-NEXT: vblendps {{.*#+}} ymm2 = ymm4[0,1,2,3],ymm2[4,5,6,7]
; NOGATHER-NEXT: vextractf128 $1, %ymm3, %xmm3
; NOGATHER-NEXT: vpextrq $1, %xmm3, %rax
; NOGATHER-NEXT: vpinsrd $3, (%rax), %xmm1, %xmm3
; NOGATHER-NEXT: vblendps {{.*#+}} ymm1 = ymm3[0,1,2,3],ymm1[4,5,6,7]
; NOGATHER-NEXT: .LBB6_8: # %else8
; NOGATHER-NEXT: vpextrb $8, %xmm0, %eax
; NOGATHER-NEXT: testb $1, %al
; NOGATHER-NEXT: je .LBB6_10
; NOGATHER-NEXT: # %bb.9: # %cond.load10
; NOGATHER-NEXT: vmovq %xmm3, %rax
; NOGATHER-NEXT: vextractf128 $1, %ymm2, %xmm4
; NOGATHER-NEXT: vpinsrd $0, (%rax), %xmm4, %xmm4
; NOGATHER-NEXT: vinsertf128 $1, %xmm4, %ymm2, %ymm2
; NOGATHER-NEXT: vmovq %xmm2, %rax
; NOGATHER-NEXT: vextractf128 $1, %ymm1, %xmm3
; NOGATHER-NEXT: vpinsrd $0, (%rax), %xmm3, %xmm3
; NOGATHER-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
; NOGATHER-NEXT: .LBB6_10: # %else11
; NOGATHER-NEXT: vpextrb $10, %xmm0, %eax
; NOGATHER-NEXT: testb $1, %al
; NOGATHER-NEXT: je .LBB6_12
; NOGATHER-NEXT: # %bb.11: # %cond.load13
; NOGATHER-NEXT: vpextrq $1, %xmm3, %rax
; NOGATHER-NEXT: vextractf128 $1, %ymm2, %xmm4
; NOGATHER-NEXT: vpinsrd $1, (%rax), %xmm4, %xmm4
; NOGATHER-NEXT: vinsertf128 $1, %xmm4, %ymm2, %ymm2
; NOGATHER-NEXT: vpextrq $1, %xmm2, %rax
; NOGATHER-NEXT: vextractf128 $1, %ymm1, %xmm3
; NOGATHER-NEXT: vpinsrd $1, (%rax), %xmm3, %xmm3
; NOGATHER-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
; NOGATHER-NEXT: .LBB6_12: # %else14
; NOGATHER-NEXT: vpextrb $12, %xmm0, %eax
; NOGATHER-NEXT: testb $1, %al
; NOGATHER-NEXT: je .LBB6_14
; NOGATHER-NEXT: # %bb.13: # %cond.load16
; NOGATHER-NEXT: vextractf128 $1, %ymm3, %xmm4
; NOGATHER-NEXT: vmovq %xmm4, %rax
; NOGATHER-NEXT: vextractf128 $1, %ymm2, %xmm4
; NOGATHER-NEXT: vpinsrd $2, (%rax), %xmm4, %xmm4
; NOGATHER-NEXT: vinsertf128 $1, %xmm4, %ymm2, %ymm2
; NOGATHER-NEXT: vextractf128 $1, %ymm2, %xmm3
; NOGATHER-NEXT: vmovq %xmm3, %rax
; NOGATHER-NEXT: vextractf128 $1, %ymm1, %xmm3
; NOGATHER-NEXT: vpinsrd $2, (%rax), %xmm3, %xmm3
; NOGATHER-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
; NOGATHER-NEXT: .LBB6_14: # %else17
; NOGATHER-NEXT: vpextrb $14, %xmm0, %eax
; NOGATHER-NEXT: testb $1, %al
; NOGATHER-NEXT: je .LBB6_16
; NOGATHER-NEXT: # %bb.15: # %cond.load19
; NOGATHER-NEXT: vextractf128 $1, %ymm3, %xmm3
; NOGATHER-NEXT: vpextrq $1, %xmm3, %rax
; NOGATHER-NEXT: vextractf128 $1, %ymm2, %xmm3
; NOGATHER-NEXT: vpinsrd $3, (%rax), %xmm3, %xmm3
; NOGATHER-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
; NOGATHER-NEXT: vextractf128 $1, %ymm2, %xmm0
; NOGATHER-NEXT: vpextrq $1, %xmm0, %rax
; NOGATHER-NEXT: vextractf128 $1, %ymm1, %xmm0
; NOGATHER-NEXT: vpinsrd $3, (%rax), %xmm0, %xmm0
; NOGATHER-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm1
; NOGATHER-NEXT: .LBB6_16: # %else20
; NOGATHER-NEXT: vpmovzxwd {{.*#+}} xmm3 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
; NOGATHER-NEXT: vpslld $31, %xmm3, %xmm3
; NOGATHER-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
; NOGATHER-NEXT: vpslld $31, %xmm0, %xmm0
; NOGATHER-NEXT: vinsertf128 $1, %xmm0, %ymm3, %ymm0
; NOGATHER-NEXT: vblendvps %ymm0, %ymm2, %ymm1, %ymm0
; NOGATHER-NEXT: vmovaps %ymm1, %ymm0
; NOGATHER-NEXT: retq
entry:
%ld = load <8 x i32*>, <8 x i32*>* %ptr
@ -473,87 +458,82 @@ define <8 x float> @masked_gather_v8float(<8 x float*>* %ptr, <8 x i1> %masks, <
;
; NOGATHER-LABEL: masked_gather_v8float:
; NOGATHER: # %bb.0: # %entry
; NOGATHER-NEXT: vmovdqa (%rdi), %ymm4
; NOGATHER-NEXT: vmovdqa 32(%rdi), %ymm3
; NOGATHER-NEXT: vmovdqa (%rdi), %ymm3
; NOGATHER-NEXT: vmovdqa 32(%rdi), %ymm2
; NOGATHER-NEXT: vpextrb $0, %xmm0, %eax
; NOGATHER-NEXT: # implicit-def: $ymm2
; NOGATHER-NEXT: testb $1, %al
; NOGATHER-NEXT: je .LBB7_2
; NOGATHER-NEXT: # %bb.1: # %cond.load
; NOGATHER-NEXT: vmovq %xmm4, %rax
; NOGATHER-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
; NOGATHER-NEXT: vmovq %xmm3, %rax
; NOGATHER-NEXT: vmovss {{.*#+}} xmm4 = mem[0],zero,zero,zero
; NOGATHER-NEXT: vblendps {{.*#+}} ymm1 = ymm4[0],ymm1[1,2,3,4,5,6,7]
; NOGATHER-NEXT: .LBB7_2: # %else
; NOGATHER-NEXT: vpextrb $2, %xmm0, %eax
; NOGATHER-NEXT: testb $1, %al
; NOGATHER-NEXT: je .LBB7_4
; NOGATHER-NEXT: # %bb.3: # %cond.load1
; NOGATHER-NEXT: vpextrq $1, %xmm4, %rax
; NOGATHER-NEXT: vinsertps {{.*#+}} xmm5 = xmm2[0],mem[0],xmm2[2,3]
; NOGATHER-NEXT: vblendps {{.*#+}} ymm2 = ymm5[0,1,2,3],ymm2[4,5,6,7]
; NOGATHER-NEXT: vpextrq $1, %xmm3, %rax
; NOGATHER-NEXT: vinsertps {{.*#+}} xmm4 = xmm1[0],mem[0],xmm1[2,3]
; NOGATHER-NEXT: vblendps {{.*#+}} ymm1 = ymm4[0,1,2,3],ymm1[4,5,6,7]
; NOGATHER-NEXT: .LBB7_4: # %else2
; NOGATHER-NEXT: vpextrb $4, %xmm0, %eax
; NOGATHER-NEXT: testb $1, %al
; NOGATHER-NEXT: je .LBB7_6
; NOGATHER-NEXT: # %bb.5: # %cond.load4
; NOGATHER-NEXT: vextractf128 $1, %ymm4, %xmm5
; NOGATHER-NEXT: vmovq %xmm5, %rax
; NOGATHER-NEXT: vinsertps {{.*#+}} xmm5 = xmm2[0,1],mem[0],xmm2[3]
; NOGATHER-NEXT: vblendps {{.*#+}} ymm2 = ymm5[0,1,2,3],ymm2[4,5,6,7]
; NOGATHER-NEXT: vextractf128 $1, %ymm3, %xmm4
; NOGATHER-NEXT: vmovq %xmm4, %rax
; NOGATHER-NEXT: vinsertps {{.*#+}} xmm4 = xmm1[0,1],mem[0],xmm1[3]
; NOGATHER-NEXT: vblendps {{.*#+}} ymm1 = ymm4[0,1,2,3],ymm1[4,5,6,7]
; NOGATHER-NEXT: .LBB7_6: # %else5
; NOGATHER-NEXT: vpextrb $6, %xmm0, %eax
; NOGATHER-NEXT: testb $1, %al
; NOGATHER-NEXT: je .LBB7_8
; NOGATHER-NEXT: # %bb.7: # %cond.load7
; NOGATHER-NEXT: vextractf128 $1, %ymm4, %xmm4
; NOGATHER-NEXT: vpextrq $1, %xmm4, %rax
; NOGATHER-NEXT: vinsertps {{.*#+}} xmm4 = xmm2[0,1,2],mem[0]
; NOGATHER-NEXT: vblendps {{.*#+}} ymm2 = ymm4[0,1,2,3],ymm2[4,5,6,7]
; NOGATHER-NEXT: vextractf128 $1, %ymm3, %xmm3
; NOGATHER-NEXT: vpextrq $1, %xmm3, %rax
; NOGATHER-NEXT: vinsertps {{.*#+}} xmm3 = xmm1[0,1,2],mem[0]
; NOGATHER-NEXT: vblendps {{.*#+}} ymm1 = ymm3[0,1,2,3],ymm1[4,5,6,7]
; NOGATHER-NEXT: .LBB7_8: # %else8
; NOGATHER-NEXT: vpextrb $8, %xmm0, %eax
; NOGATHER-NEXT: testb $1, %al
; NOGATHER-NEXT: je .LBB7_10
; NOGATHER-NEXT: # %bb.9: # %cond.load10
; NOGATHER-NEXT: vmovq %xmm3, %rax
; NOGATHER-NEXT: vmovss {{.*#+}} xmm4 = mem[0],zero,zero,zero
; NOGATHER-NEXT: vextractf128 $1, %ymm2, %xmm5
; NOGATHER-NEXT: vblendps {{.*#+}} xmm4 = xmm4[0],xmm5[1,2,3]
; NOGATHER-NEXT: vinsertf128 $1, %xmm4, %ymm2, %ymm2
; NOGATHER-NEXT: vmovq %xmm2, %rax
; NOGATHER-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
; NOGATHER-NEXT: vextractf128 $1, %ymm1, %xmm4
; NOGATHER-NEXT: vblendps {{.*#+}} xmm3 = xmm3[0],xmm4[1,2,3]
; NOGATHER-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
; NOGATHER-NEXT: .LBB7_10: # %else11
; NOGATHER-NEXT: vpextrb $10, %xmm0, %eax
; NOGATHER-NEXT: testb $1, %al
; NOGATHER-NEXT: je .LBB7_12
; NOGATHER-NEXT: # %bb.11: # %cond.load13
; NOGATHER-NEXT: vpextrq $1, %xmm3, %rax
; NOGATHER-NEXT: vextractf128 $1, %ymm2, %xmm4
; NOGATHER-NEXT: vinsertps {{.*#+}} xmm4 = xmm4[0],mem[0],xmm4[2,3]
; NOGATHER-NEXT: vinsertf128 $1, %xmm4, %ymm2, %ymm2
; NOGATHER-NEXT: vpextrq $1, %xmm2, %rax
; NOGATHER-NEXT: vextractf128 $1, %ymm1, %xmm3
; NOGATHER-NEXT: vinsertps {{.*#+}} xmm3 = xmm3[0],mem[0],xmm3[2,3]
; NOGATHER-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
; NOGATHER-NEXT: .LBB7_12: # %else14
; NOGATHER-NEXT: vpextrb $12, %xmm0, %eax
; NOGATHER-NEXT: testb $1, %al
; NOGATHER-NEXT: je .LBB7_14
; NOGATHER-NEXT: # %bb.13: # %cond.load16
; NOGATHER-NEXT: vextractf128 $1, %ymm3, %xmm4
; NOGATHER-NEXT: vmovq %xmm4, %rax
; NOGATHER-NEXT: vextractf128 $1, %ymm2, %xmm4
; NOGATHER-NEXT: vinsertps {{.*#+}} xmm4 = xmm4[0,1],mem[0],xmm4[3]
; NOGATHER-NEXT: vinsertf128 $1, %xmm4, %ymm2, %ymm2
; NOGATHER-NEXT: vextractf128 $1, %ymm2, %xmm3
; NOGATHER-NEXT: vmovq %xmm3, %rax
; NOGATHER-NEXT: vextractf128 $1, %ymm1, %xmm3
; NOGATHER-NEXT: vinsertps {{.*#+}} xmm3 = xmm3[0,1],mem[0],xmm3[3]
; NOGATHER-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
; NOGATHER-NEXT: .LBB7_14: # %else17
; NOGATHER-NEXT: vpextrb $14, %xmm0, %eax
; NOGATHER-NEXT: testb $1, %al
; NOGATHER-NEXT: je .LBB7_16
; NOGATHER-NEXT: # %bb.15: # %cond.load19
; NOGATHER-NEXT: vextractf128 $1, %ymm3, %xmm3
; NOGATHER-NEXT: vpextrq $1, %xmm3, %rax
; NOGATHER-NEXT: vextractf128 $1, %ymm2, %xmm3
; NOGATHER-NEXT: vinsertps {{.*#+}} xmm3 = xmm3[0,1,2],mem[0]
; NOGATHER-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
; NOGATHER-NEXT: vextractf128 $1, %ymm2, %xmm0
; NOGATHER-NEXT: vpextrq $1, %xmm0, %rax
; NOGATHER-NEXT: vextractf128 $1, %ymm1, %xmm0
; NOGATHER-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0]
; NOGATHER-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm1
; NOGATHER-NEXT: .LBB7_16: # %else20
; NOGATHER-NEXT: vpmovzxwd {{.*#+}} xmm3 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
; NOGATHER-NEXT: vpslld $31, %xmm3, %xmm3
; NOGATHER-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
; NOGATHER-NEXT: vpslld $31, %xmm0, %xmm0
; NOGATHER-NEXT: vinsertf128 $1, %xmm0, %ymm3, %ymm0
; NOGATHER-NEXT: vblendvps %ymm0, %ymm2, %ymm1, %ymm0
; NOGATHER-NEXT: vmovaps %ymm1, %ymm0
; NOGATHER-NEXT: retq
entry:
%ld = load <8 x float*>, <8 x float*>* %ptr
@ -585,50 +565,44 @@ define <4 x i64> @masked_gather_v4i64(<4 x i64*>* %ptr, <4 x i1> %masks, <4 x i6
;
; NOGATHER-LABEL: masked_gather_v4i64:
; NOGATHER: # %bb.0: # %entry
; NOGATHER-NEXT: vmovdqa (%rdi), %ymm3
; NOGATHER-NEXT: vmovdqa (%rdi), %ymm2
; NOGATHER-NEXT: vpextrb $0, %xmm0, %eax
; NOGATHER-NEXT: # implicit-def: $ymm2
; NOGATHER-NEXT: testb $1, %al
; NOGATHER-NEXT: je .LBB8_2
; NOGATHER-NEXT: # %bb.1: # %cond.load
; NOGATHER-NEXT: vmovq %xmm3, %rax
; NOGATHER-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero
; NOGATHER-NEXT: vmovq %xmm2, %rax
; NOGATHER-NEXT: vpinsrq $0, (%rax), %xmm1, %xmm3
; NOGATHER-NEXT: vblendps {{.*#+}} ymm1 = ymm3[0,1,2,3],ymm1[4,5,6,7]
; NOGATHER-NEXT: .LBB8_2: # %else
; NOGATHER-NEXT: vpextrb $4, %xmm0, %eax
; NOGATHER-NEXT: testb $1, %al
; NOGATHER-NEXT: je .LBB8_4
; NOGATHER-NEXT: # %bb.3: # %cond.load1
; NOGATHER-NEXT: vpextrq $1, %xmm3, %rax
; NOGATHER-NEXT: vpinsrq $1, (%rax), %xmm2, %xmm4
; NOGATHER-NEXT: vblendps {{.*#+}} ymm2 = ymm4[0,1,2,3],ymm2[4,5,6,7]
; NOGATHER-NEXT: vpextrq $1, %xmm2, %rax
; NOGATHER-NEXT: vpinsrq $1, (%rax), %xmm1, %xmm3
; NOGATHER-NEXT: vblendps {{.*#+}} ymm1 = ymm3[0,1,2,3],ymm1[4,5,6,7]
; NOGATHER-NEXT: .LBB8_4: # %else2
; NOGATHER-NEXT: vpextrb $8, %xmm0, %eax
; NOGATHER-NEXT: testb $1, %al
; NOGATHER-NEXT: je .LBB8_6
; NOGATHER-NEXT: # %bb.5: # %cond.load4
; NOGATHER-NEXT: vextractf128 $1, %ymm3, %xmm4
; NOGATHER-NEXT: vmovq %xmm4, %rax
; NOGATHER-NEXT: vextractf128 $1, %ymm2, %xmm4
; NOGATHER-NEXT: vpinsrq $0, (%rax), %xmm4, %xmm4
; NOGATHER-NEXT: vinsertf128 $1, %xmm4, %ymm2, %ymm2
; NOGATHER-NEXT: vextractf128 $1, %ymm2, %xmm3
; NOGATHER-NEXT: vmovq %xmm3, %rax
; NOGATHER-NEXT: vextractf128 $1, %ymm1, %xmm3
; NOGATHER-NEXT: vpinsrq $0, (%rax), %xmm3, %xmm3
; NOGATHER-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
; NOGATHER-NEXT: .LBB8_6: # %else5
; NOGATHER-NEXT: vpextrb $12, %xmm0, %eax
; NOGATHER-NEXT: testb $1, %al
; NOGATHER-NEXT: je .LBB8_8
; NOGATHER-NEXT: # %bb.7: # %cond.load7
; NOGATHER-NEXT: vextractf128 $1, %ymm3, %xmm3
; NOGATHER-NEXT: vpextrq $1, %xmm3, %rax
; NOGATHER-NEXT: vextractf128 $1, %ymm2, %xmm3
; NOGATHER-NEXT: vpinsrq $1, (%rax), %xmm3, %xmm3
; NOGATHER-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
; NOGATHER-NEXT: vextractf128 $1, %ymm2, %xmm0
; NOGATHER-NEXT: vpextrq $1, %xmm0, %rax
; NOGATHER-NEXT: vextractf128 $1, %ymm1, %xmm0
; NOGATHER-NEXT: vpinsrq $1, (%rax), %xmm0, %xmm0
; NOGATHER-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm1
; NOGATHER-NEXT: .LBB8_8: # %else8
; NOGATHER-NEXT: vpslld $31, %xmm0, %xmm0
; NOGATHER-NEXT: vpsrad $31, %xmm0, %xmm0
; NOGATHER-NEXT: vpmovsxdq %xmm0, %xmm3
; NOGATHER-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
; NOGATHER-NEXT: vpmovsxdq %xmm0, %xmm0
; NOGATHER-NEXT: vinsertf128 $1, %xmm0, %ymm3, %ymm0
; NOGATHER-NEXT: vblendvpd %ymm0, %ymm2, %ymm1, %ymm0
; NOGATHER-NEXT: vmovaps %ymm1, %ymm0
; NOGATHER-NEXT: retq
entry:
%ld = load <4 x i64*>, <4 x i64*>* %ptr
@ -660,50 +634,44 @@ define <4 x double> @masked_gather_v4double(<4 x double*>* %ptr, <4 x i1> %masks
;
; NOGATHER-LABEL: masked_gather_v4double:
; NOGATHER: # %bb.0: # %entry
; NOGATHER-NEXT: vmovdqa (%rdi), %ymm3
; NOGATHER-NEXT: vmovdqa (%rdi), %ymm2
; NOGATHER-NEXT: vpextrb $0, %xmm0, %eax
; NOGATHER-NEXT: # implicit-def: $ymm2
; NOGATHER-NEXT: testb $1, %al
; NOGATHER-NEXT: je .LBB9_2
; NOGATHER-NEXT: # %bb.1: # %cond.load
; NOGATHER-NEXT: vmovq %xmm3, %rax
; NOGATHER-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero
; NOGATHER-NEXT: vmovq %xmm2, %rax
; NOGATHER-NEXT: vmovsd {{.*#+}} xmm3 = mem[0],zero
; NOGATHER-NEXT: vblendpd {{.*#+}} ymm1 = ymm3[0],ymm1[1,2,3]
; NOGATHER-NEXT: .LBB9_2: # %else
; NOGATHER-NEXT: vpextrb $4, %xmm0, %eax
; NOGATHER-NEXT: testb $1, %al
; NOGATHER-NEXT: je .LBB9_4
; NOGATHER-NEXT: # %bb.3: # %cond.load1
; NOGATHER-NEXT: vpextrq $1, %xmm3, %rax
; NOGATHER-NEXT: vmovhpd {{.*#+}} xmm4 = xmm2[0],mem[0]
; NOGATHER-NEXT: vblendpd {{.*#+}} ymm2 = ymm4[0,1],ymm2[2,3]
; NOGATHER-NEXT: vpextrq $1, %xmm2, %rax
; NOGATHER-NEXT: vmovhpd {{.*#+}} xmm3 = xmm1[0],mem[0]
; NOGATHER-NEXT: vblendpd {{.*#+}} ymm1 = ymm3[0,1],ymm1[2,3]
; NOGATHER-NEXT: .LBB9_4: # %else2
; NOGATHER-NEXT: vpextrb $8, %xmm0, %eax
; NOGATHER-NEXT: testb $1, %al
; NOGATHER-NEXT: je .LBB9_6
; NOGATHER-NEXT: # %bb.5: # %cond.load4
; NOGATHER-NEXT: vextractf128 $1, %ymm3, %xmm4
; NOGATHER-NEXT: vmovq %xmm4, %rax
; NOGATHER-NEXT: vextractf128 $1, %ymm2, %xmm4
; NOGATHER-NEXT: vmovlpd {{.*#+}} xmm4 = mem[0],xmm4[1]
; NOGATHER-NEXT: vinsertf128 $1, %xmm4, %ymm2, %ymm2
; NOGATHER-NEXT: vextractf128 $1, %ymm2, %xmm3
; NOGATHER-NEXT: vmovq %xmm3, %rax
; NOGATHER-NEXT: vextractf128 $1, %ymm1, %xmm3
; NOGATHER-NEXT: vmovlpd {{.*#+}} xmm3 = mem[0],xmm3[1]
; NOGATHER-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
; NOGATHER-NEXT: .LBB9_6: # %else5
; NOGATHER-NEXT: vpextrb $12, %xmm0, %eax
; NOGATHER-NEXT: testb $1, %al
; NOGATHER-NEXT: je .LBB9_8
; NOGATHER-NEXT: # %bb.7: # %cond.load7
; NOGATHER-NEXT: vextractf128 $1, %ymm3, %xmm3
; NOGATHER-NEXT: vpextrq $1, %xmm3, %rax
; NOGATHER-NEXT: vextractf128 $1, %ymm2, %xmm3
; NOGATHER-NEXT: vmovhpd {{.*#+}} xmm3 = xmm3[0],mem[0]
; NOGATHER-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
; NOGATHER-NEXT: vextractf128 $1, %ymm2, %xmm0
; NOGATHER-NEXT: vpextrq $1, %xmm0, %rax
; NOGATHER-NEXT: vextractf128 $1, %ymm1, %xmm0
; NOGATHER-NEXT: vmovhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
; NOGATHER-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm1
; NOGATHER-NEXT: .LBB9_8: # %else8
; NOGATHER-NEXT: vpslld $31, %xmm0, %xmm0
; NOGATHER-NEXT: vpsrad $31, %xmm0, %xmm0
; NOGATHER-NEXT: vpmovsxdq %xmm0, %xmm3
; NOGATHER-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
; NOGATHER-NEXT: vpmovsxdq %xmm0, %xmm0
; NOGATHER-NEXT: vinsertf128 $1, %xmm0, %ymm3, %ymm0
; NOGATHER-NEXT: vblendvpd %ymm0, %ymm2, %ymm1, %ymm0
; NOGATHER-NEXT: vmovapd %ymm1, %ymm0
; NOGATHER-NEXT: retq
entry:
%ld = load <4 x double*>, <4 x double*>* %ptr
@ -733,24 +701,22 @@ define <2 x i64> @masked_gather_v2i64(<2 x i64*>* %ptr, <2 x i1> %masks, <2 x i6
;
; NOGATHER-LABEL: masked_gather_v2i64:
; NOGATHER: # %bb.0: # %entry
; NOGATHER-NEXT: vmovdqa (%rdi), %xmm3
; NOGATHER-NEXT: vmovdqa (%rdi), %xmm2
; NOGATHER-NEXT: vpextrb $0, %xmm0, %eax
; NOGATHER-NEXT: # implicit-def: $xmm2
; NOGATHER-NEXT: testb $1, %al
; NOGATHER-NEXT: je .LBB10_2
; NOGATHER-NEXT: # %bb.1: # %cond.load
; NOGATHER-NEXT: vmovq %xmm3, %rax
; NOGATHER-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero
; NOGATHER-NEXT: vmovq %xmm2, %rax
; NOGATHER-NEXT: vpinsrq $0, (%rax), %xmm1, %xmm1
; NOGATHER-NEXT: .LBB10_2: # %else
; NOGATHER-NEXT: vpextrb $8, %xmm0, %eax
; NOGATHER-NEXT: testb $1, %al
; NOGATHER-NEXT: je .LBB10_4
; NOGATHER-NEXT: # %bb.3: # %cond.load1
; NOGATHER-NEXT: vpextrq $1, %xmm3, %rax
; NOGATHER-NEXT: vpinsrq $1, (%rax), %xmm2, %xmm2
; NOGATHER-NEXT: vpextrq $1, %xmm2, %rax
; NOGATHER-NEXT: vpinsrq $1, (%rax), %xmm1, %xmm1
; NOGATHER-NEXT: .LBB10_4: # %else2
; NOGATHER-NEXT: vpsllq $63, %xmm0, %xmm0
; NOGATHER-NEXT: vblendvpd %xmm0, %xmm2, %xmm1, %xmm0
; NOGATHER-NEXT: vmovdqa %xmm1, %xmm0
; NOGATHER-NEXT: retq
entry:
%ld = load <2 x i64*>, <2 x i64*>* %ptr
@ -780,24 +746,22 @@ define <2 x double> @masked_gather_v2double(<2 x double*>* %ptr, <2 x i1> %masks
;
; NOGATHER-LABEL: masked_gather_v2double:
; NOGATHER: # %bb.0: # %entry
; NOGATHER-NEXT: vmovdqa (%rdi), %xmm3
; NOGATHER-NEXT: vmovdqa (%rdi), %xmm2
; NOGATHER-NEXT: vpextrb $0, %xmm0, %eax
; NOGATHER-NEXT: # implicit-def: $xmm2
; NOGATHER-NEXT: testb $1, %al
; NOGATHER-NEXT: je .LBB11_2
; NOGATHER-NEXT: # %bb.1: # %cond.load
; NOGATHER-NEXT: vmovq %xmm3, %rax
; NOGATHER-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero
; NOGATHER-NEXT: vmovq %xmm2, %rax
; NOGATHER-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1]
; NOGATHER-NEXT: .LBB11_2: # %else
; NOGATHER-NEXT: vpextrb $8, %xmm0, %eax
; NOGATHER-NEXT: testb $1, %al
; NOGATHER-NEXT: je .LBB11_4
; NOGATHER-NEXT: # %bb.3: # %cond.load1
; NOGATHER-NEXT: vpextrq $1, %xmm3, %rax
; NOGATHER-NEXT: vmovhpd {{.*#+}} xmm2 = xmm2[0],mem[0]
; NOGATHER-NEXT: vpextrq $1, %xmm2, %rax
; NOGATHER-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0]
; NOGATHER-NEXT: .LBB11_4: # %else2
; NOGATHER-NEXT: vpsllq $63, %xmm0, %xmm0
; NOGATHER-NEXT: vblendvpd %xmm0, %xmm2, %xmm1, %xmm0
; NOGATHER-NEXT: vmovapd %xmm1, %xmm0
; NOGATHER-NEXT: retq
entry:
%ld = load <2 x double*>, <2 x double*>* %ptr

View File

@ -1658,38 +1658,35 @@ declare <3 x i32> @llvm.masked.gather.v3i32.v3p0i32(<3 x i32*>, i32, <3 x i1>, <
define <3 x i32> @test30(<3 x i32*> %base, <3 x i32> %ind, <3 x i1> %mask, <3 x i32> %src0) {
; KNL_64-LABEL: test30:
; KNL_64: # %bb.0:
; KNL_64-NEXT: # kill: def $xmm3 killed $xmm3 def $zmm3
; KNL_64-NEXT: vpslld $31, %xmm2, %xmm2
; KNL_64-NEXT: vptestmd %zmm2, %zmm2, %k1
; KNL_64-NEXT: kmovw %k1, %eax
; KNL_64-NEXT: vptestmd %zmm2, %zmm2, %k0
; KNL_64-NEXT: kmovw %k0, %eax
; KNL_64-NEXT: vpmovsxdq %xmm1, %ymm1
; KNL_64-NEXT: vpsllq $2, %ymm1, %ymm1
; KNL_64-NEXT: vpaddq %ymm1, %ymm0, %ymm1
; KNL_64-NEXT: vpaddq %ymm1, %ymm0, %ymm0
; KNL_64-NEXT: testb $1, %al
; KNL_64-NEXT: # implicit-def: $xmm0
; KNL_64-NEXT: je .LBB31_2
; KNL_64-NEXT: # %bb.1: # %cond.load
; KNL_64-NEXT: vmovq %xmm1, %rax
; KNL_64-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; KNL_64-NEXT: vmovq %xmm0, %rax
; KNL_64-NEXT: vpinsrd $0, (%rax), %xmm3, %xmm3
; KNL_64-NEXT: .LBB31_2: # %else
; KNL_64-NEXT: kshiftrw $1, %k1, %k0
; KNL_64-NEXT: kmovw %k0, %eax
; KNL_64-NEXT: kshiftrw $1, %k0, %k1
; KNL_64-NEXT: kmovw %k1, %eax
; KNL_64-NEXT: testb $1, %al
; KNL_64-NEXT: je .LBB31_4
; KNL_64-NEXT: # %bb.3: # %cond.load1
; KNL_64-NEXT: vpextrq $1, %xmm1, %rax
; KNL_64-NEXT: vpinsrd $1, (%rax), %xmm0, %xmm0
; KNL_64-NEXT: vpextrq $1, %xmm0, %rax
; KNL_64-NEXT: vpinsrd $1, (%rax), %xmm3, %xmm3
; KNL_64-NEXT: .LBB31_4: # %else2
; KNL_64-NEXT: kshiftrw $2, %k1, %k0
; KNL_64-NEXT: kshiftrw $2, %k0, %k0
; KNL_64-NEXT: kmovw %k0, %eax
; KNL_64-NEXT: testb $1, %al
; KNL_64-NEXT: je .LBB31_6
; KNL_64-NEXT: # %bb.5: # %cond.load4
; KNL_64-NEXT: vextracti128 $1, %ymm1, %xmm1
; KNL_64-NEXT: vmovq %xmm1, %rax
; KNL_64-NEXT: vpinsrd $2, (%rax), %xmm0, %xmm0
; KNL_64-NEXT: vextracti128 $1, %ymm0, %xmm0
; KNL_64-NEXT: vmovq %xmm0, %rax
; KNL_64-NEXT: vpinsrd $2, (%rax), %xmm3, %xmm3
; KNL_64-NEXT: .LBB31_6: # %else5
; KNL_64-NEXT: vmovdqa32 %zmm0, %zmm3 {%k1}
; KNL_64-NEXT: vmovdqa %xmm3, %xmm0
; KNL_64-NEXT: vzeroupper
; KNL_64-NEXT: retq
@ -1698,37 +1695,35 @@ define <3 x i32> @test30(<3 x i32*> %base, <3 x i32> %ind, <3 x i1> %mask, <3 x
; KNL_32: # %bb.0:
; KNL_32-NEXT: subl $12, %esp
; KNL_32-NEXT: .cfi_def_cfa_offset 16
; KNL_32-NEXT: vpslld $31, %xmm2, %xmm2
; KNL_32-NEXT: vptestmd %zmm2, %zmm2, %k1
; KNL_32-NEXT: kmovw %k1, %eax
; KNL_32-NEXT: vmovdqa %xmm0, %xmm3
; KNL_32-NEXT: vpslld $31, %xmm2, %xmm0
; KNL_32-NEXT: vptestmd %zmm0, %zmm0, %k0
; KNL_32-NEXT: kmovw %k0, %eax
; KNL_32-NEXT: vmovdqa {{[0-9]+}}(%esp), %xmm0
; KNL_32-NEXT: vpslld $2, %xmm1, %xmm1
; KNL_32-NEXT: vpaddd %xmm1, %xmm0, %xmm2
; KNL_32-NEXT: vpaddd %xmm1, %xmm3, %xmm1
; KNL_32-NEXT: testb $1, %al
; KNL_32-NEXT: # implicit-def: $xmm1
; KNL_32-NEXT: je .LBB31_2
; KNL_32-NEXT: # %bb.1: # %cond.load
; KNL_32-NEXT: vmovd %xmm2, %eax
; KNL_32-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; KNL_32-NEXT: vmovd %xmm1, %eax
; KNL_32-NEXT: vpinsrd $0, (%eax), %xmm0, %xmm0
; KNL_32-NEXT: .LBB31_2: # %else
; KNL_32-NEXT: kshiftrw $1, %k1, %k0
; KNL_32-NEXT: kmovw %k0, %eax
; KNL_32-NEXT: kshiftrw $1, %k0, %k1
; KNL_32-NEXT: kmovw %k1, %eax
; KNL_32-NEXT: testb $1, %al
; KNL_32-NEXT: je .LBB31_4
; KNL_32-NEXT: # %bb.3: # %cond.load1
; KNL_32-NEXT: vpextrd $1, %xmm2, %eax
; KNL_32-NEXT: vpinsrd $1, (%eax), %xmm1, %xmm1
; KNL_32-NEXT: vpextrd $1, %xmm1, %eax
; KNL_32-NEXT: vpinsrd $1, (%eax), %xmm0, %xmm0
; KNL_32-NEXT: .LBB31_4: # %else2
; KNL_32-NEXT: vmovdqa {{[0-9]+}}(%esp), %xmm0
; KNL_32-NEXT: kshiftrw $2, %k1, %k0
; KNL_32-NEXT: kshiftrw $2, %k0, %k0
; KNL_32-NEXT: kmovw %k0, %eax
; KNL_32-NEXT: testb $1, %al
; KNL_32-NEXT: je .LBB31_6
; KNL_32-NEXT: # %bb.5: # %cond.load4
; KNL_32-NEXT: vpextrd $2, %xmm2, %eax
; KNL_32-NEXT: vpinsrd $2, (%eax), %xmm1, %xmm1
; KNL_32-NEXT: vpextrd $2, %xmm1, %eax
; KNL_32-NEXT: vpinsrd $2, (%eax), %xmm0, %xmm0
; KNL_32-NEXT: .LBB31_6: # %else5
; KNL_32-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1}
; KNL_32-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; KNL_32-NEXT: addl $12, %esp
; KNL_32-NEXT: .cfi_def_cfa_offset 4
; KNL_32-NEXT: vzeroupper
@ -1737,36 +1732,34 @@ define <3 x i32> @test30(<3 x i32*> %base, <3 x i32> %ind, <3 x i1> %mask, <3 x
; SKX-LABEL: test30:
; SKX: # %bb.0:
; SKX-NEXT: vpslld $31, %xmm2, %xmm2
; SKX-NEXT: vpmovd2m %xmm2, %k1
; SKX-NEXT: kmovw %k1, %eax
; SKX-NEXT: vpmovd2m %xmm2, %k0
; SKX-NEXT: kmovw %k0, %eax
; SKX-NEXT: vpmovsxdq %xmm1, %ymm1
; SKX-NEXT: vpsllq $2, %ymm1, %ymm1
; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm1
; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0
; SKX-NEXT: testb $1, %al
; SKX-NEXT: # implicit-def: $xmm0
; SKX-NEXT: je .LBB31_2
; SKX-NEXT: # %bb.1: # %cond.load
; SKX-NEXT: vmovq %xmm1, %rax
; SKX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SKX-NEXT: vmovq %xmm0, %rax
; SKX-NEXT: vpinsrd $0, (%rax), %xmm3, %xmm3
; SKX-NEXT: .LBB31_2: # %else
; SKX-NEXT: kshiftrw $1, %k1, %k0
; SKX-NEXT: kmovw %k0, %eax
; SKX-NEXT: kshiftrw $1, %k0, %k1
; SKX-NEXT: kmovw %k1, %eax
; SKX-NEXT: testb $1, %al
; SKX-NEXT: je .LBB31_4
; SKX-NEXT: # %bb.3: # %cond.load1
; SKX-NEXT: vpextrq $1, %xmm1, %rax
; SKX-NEXT: vpinsrd $1, (%rax), %xmm0, %xmm0
; SKX-NEXT: vpextrq $1, %xmm0, %rax
; SKX-NEXT: vpinsrd $1, (%rax), %xmm3, %xmm3
; SKX-NEXT: .LBB31_4: # %else2
; SKX-NEXT: kshiftrw $2, %k1, %k0
; SKX-NEXT: kshiftrw $2, %k0, %k0
; SKX-NEXT: kmovw %k0, %eax
; SKX-NEXT: testb $1, %al
; SKX-NEXT: je .LBB31_6
; SKX-NEXT: # %bb.5: # %cond.load4
; SKX-NEXT: vextracti128 $1, %ymm1, %xmm1
; SKX-NEXT: vmovq %xmm1, %rax
; SKX-NEXT: vpinsrd $2, (%rax), %xmm0, %xmm0
; SKX-NEXT: vextracti128 $1, %ymm0, %xmm0
; SKX-NEXT: vmovq %xmm0, %rax
; SKX-NEXT: vpinsrd $2, (%rax), %xmm3, %xmm3
; SKX-NEXT: .LBB31_6: # %else5
; SKX-NEXT: vmovdqa32 %xmm0, %xmm3 {%k1}
; SKX-NEXT: vmovdqa %xmm3, %xmm0
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
@ -1775,36 +1768,35 @@ define <3 x i32> @test30(<3 x i32*> %base, <3 x i32> %ind, <3 x i1> %mask, <3 x
; SKX_32: # %bb.0:
; SKX_32-NEXT: subl $12, %esp
; SKX_32-NEXT: .cfi_def_cfa_offset 16
; SKX_32-NEXT: vpslld $31, %xmm2, %xmm2
; SKX_32-NEXT: vpmovd2m %xmm2, %k1
; SKX_32-NEXT: kmovw %k1, %eax
; SKX_32-NEXT: vmovdqa %xmm0, %xmm3
; SKX_32-NEXT: vpslld $31, %xmm2, %xmm0
; SKX_32-NEXT: vpmovd2m %xmm0, %k0
; SKX_32-NEXT: kmovw %k0, %eax
; SKX_32-NEXT: vmovdqa {{[0-9]+}}(%esp), %xmm0
; SKX_32-NEXT: vpslld $2, %xmm1, %xmm1
; SKX_32-NEXT: vpaddd %xmm1, %xmm0, %xmm2
; SKX_32-NEXT: vpaddd %xmm1, %xmm3, %xmm1
; SKX_32-NEXT: testb $1, %al
; SKX_32-NEXT: # implicit-def: $xmm1
; SKX_32-NEXT: je .LBB31_2
; SKX_32-NEXT: # %bb.1: # %cond.load
; SKX_32-NEXT: vmovd %xmm2, %eax
; SKX_32-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; SKX_32-NEXT: vmovd %xmm1, %eax
; SKX_32-NEXT: vpinsrd $0, (%eax), %xmm0, %xmm0
; SKX_32-NEXT: .LBB31_2: # %else
; SKX_32-NEXT: kshiftrw $1, %k1, %k0
; SKX_32-NEXT: kmovw %k0, %eax
; SKX_32-NEXT: kshiftrw $1, %k0, %k1
; SKX_32-NEXT: kmovw %k1, %eax
; SKX_32-NEXT: testb $1, %al
; SKX_32-NEXT: je .LBB31_4
; SKX_32-NEXT: # %bb.3: # %cond.load1
; SKX_32-NEXT: vpextrd $1, %xmm2, %eax
; SKX_32-NEXT: vpinsrd $1, (%eax), %xmm1, %xmm1
; SKX_32-NEXT: vpextrd $1, %xmm1, %eax
; SKX_32-NEXT: vpinsrd $1, (%eax), %xmm0, %xmm0
; SKX_32-NEXT: .LBB31_4: # %else2
; SKX_32-NEXT: vmovdqa {{[0-9]+}}(%esp), %xmm0
; SKX_32-NEXT: kshiftrw $2, %k1, %k0
; SKX_32-NEXT: kshiftrw $2, %k0, %k0
; SKX_32-NEXT: kmovw %k0, %eax
; SKX_32-NEXT: testb $1, %al
; SKX_32-NEXT: je .LBB31_6
; SKX_32-NEXT: # %bb.5: # %cond.load4
; SKX_32-NEXT: vpextrd $2, %xmm2, %eax
; SKX_32-NEXT: vpinsrd $2, (%eax), %xmm1, %xmm1
; SKX_32-NEXT: vpextrd $2, %xmm1, %eax
; SKX_32-NEXT: vpinsrd $2, (%eax), %xmm0, %xmm0
; SKX_32-NEXT: .LBB31_6: # %else5
; SKX_32-NEXT: vmovdqa32 %xmm1, %xmm0 {%k1}
; SKX_32-NEXT: addl $12, %esp
; SKX_32-NEXT: .cfi_def_cfa_offset 4
; SKX_32-NEXT: retl

View File

@ -8,10 +8,10 @@ define <2 x i64> @scalarize_v2i64(<2 x i64*> %p, <2 x i1> %mask, <2 x i64> %pass
; CHECK: cond.load:
; CHECK-NEXT: [[PTR0:%.*]] = extractelement <2 x i64*> [[P:%.*]], i32 0
; CHECK-NEXT: [[LOAD0:%.*]] = load i64, i64* [[PTR0]], align 8
; CHECK-NEXT: [[RES0:%.*]] = insertelement <2 x i64> undef, i64 [[LOAD0]], i32 0
; CHECK-NEXT: [[RES0:%.*]] = insertelement <2 x i64> [[PASSTHRU:%.*]], i64 [[LOAD0]], i32 0
; CHECK-NEXT: br label [[ELSE]]
; CHECK: else:
; CHECK-NEXT: [[RES_PHI_ELSE:%.*]] = phi <2 x i64> [ [[RES0]], [[COND_LOAD]] ], [ undef, [[TMP0:%.*]] ]
; CHECK-NEXT: [[RES_PHI_ELSE:%.*]] = phi <2 x i64> [ [[RES0]], [[COND_LOAD]] ], [ [[PASSTHRU]], [[TMP0:%.*]] ]
; CHECK-NEXT: [[MASK1:%.*]] = extractelement <2 x i1> [[MASK]], i32 1
; CHECK-NEXT: br i1 [[MASK1]], label [[COND_LOAD1:%.*]], label [[ELSE2:%.*]]
; CHECK: cond.load1:
@ -20,9 +20,8 @@ define <2 x i64> @scalarize_v2i64(<2 x i64*> %p, <2 x i1> %mask, <2 x i64> %pass
; CHECK-NEXT: [[RES1:%.*]] = insertelement <2 x i64> [[RES_PHI_ELSE]], i64 [[LOAD1]], i32 1
; CHECK-NEXT: br label [[ELSE2]]
; CHECK: else2:
; CHECK-NEXT: [[RES_PHI_SELECT:%.*]] = phi <2 x i64> [ [[RES1]], [[COND_LOAD1]] ], [ [[RES_PHI_ELSE]], [[ELSE]] ]
; CHECK-NEXT: [[TMP1:%.*]] = select <2 x i1> [[MASK]], <2 x i64> [[RES_PHI_SELECT]], <2 x i64> [[PASSTHRU:%.*]]
; CHECK-NEXT: ret <2 x i64> [[TMP1]]
; CHECK-NEXT: [[RES_PHI_ELSE3:%.*]] = phi <2 x i64> [ [[RES1]], [[COND_LOAD1]] ], [ [[RES_PHI_ELSE]], [[ELSE]] ]
; CHECK-NEXT: ret <2 x i64> [[RES_PHI_ELSE3]]
;
%ret = call <2 x i64> @llvm.masked.gather.v2i64.v2p0i64(<2 x i64*> %p, i32 8, <2 x i1> %mask, <2 x i64> %passthru)
ret <2 x i64> %ret
@ -32,12 +31,11 @@ define <2 x i64> @scalarize_v2i64_ones_mask(<2 x i64*> %p, <2 x i64> %passthru)
; CHECK-LABEL: @scalarize_v2i64_ones_mask(
; CHECK-NEXT: [[PTR0:%.*]] = extractelement <2 x i64*> [[P:%.*]], i32 0
; CHECK-NEXT: [[LOAD0:%.*]] = load i64, i64* [[PTR0]], align 8
; CHECK-NEXT: [[RES0:%.*]] = insertelement <2 x i64> undef, i64 [[LOAD0]], i32 0
; CHECK-NEXT: [[RES0:%.*]] = insertelement <2 x i64> [[PASSTHRU:%.*]], i64 [[LOAD0]], i32 0
; CHECK-NEXT: [[PTR1:%.*]] = extractelement <2 x i64*> [[P]], i32 1
; CHECK-NEXT: [[LOAD1:%.*]] = load i64, i64* [[PTR1]], align 8
; CHECK-NEXT: [[RES1:%.*]] = insertelement <2 x i64> [[RES0]], i64 [[LOAD1]], i32 1
; CHECK-NEXT: [[TMP1:%.*]] = select <2 x i1> <i1 true, i1 true>, <2 x i64> [[RES1]], <2 x i64> [[PASSTHRU:%.*]]
; CHECK-NEXT: ret <2 x i64> [[TMP1]]
; CHECK-NEXT: ret <2 x i64> [[RES1]]
;
%ret = call <2 x i64> @llvm.masked.gather.v2i64.v2p0i64(<2 x i64*> %p, i32 8, <2 x i1> <i1 true, i1 true>, <2 x i64> %passthru)
ret <2 x i64> %ret
@ -45,8 +43,7 @@ define <2 x i64> @scalarize_v2i64_ones_mask(<2 x i64*> %p, <2 x i64> %passthru)
define <2 x i64> @scalarize_v2i64_zero_mask(<2 x i64*> %p, <2 x i64> %passthru) {
; CHECK-LABEL: @scalarize_v2i64_zero_mask(
; CHECK-NEXT: [[TMP1:%.*]] = select <2 x i1> zeroinitializer, <2 x i64> undef, <2 x i64> [[PASSTHRU:%.*]]
; CHECK-NEXT: ret <2 x i64> [[TMP1]]
; CHECK-NEXT: ret <2 x i64> [[PASSTHRU:%.*]]
;
%ret = call <2 x i64> @llvm.masked.gather.v2i64.v2p0i64(<2 x i64*> %p, i32 8, <2 x i1> <i1 false, i1 false>, <2 x i64> %passthru)
ret <2 x i64> %ret
@ -56,9 +53,8 @@ define <2 x i64> @scalarize_v2i64_const_mask(<2 x i64*> %p, <2 x i64> %passthru)
; CHECK-LABEL: @scalarize_v2i64_const_mask(
; CHECK-NEXT: [[PTR1:%.*]] = extractelement <2 x i64*> [[P:%.*]], i32 1
; CHECK-NEXT: [[LOAD1:%.*]] = load i64, i64* [[PTR1]], align 8
; CHECK-NEXT: [[RES1:%.*]] = insertelement <2 x i64> undef, i64 [[LOAD1]], i32 1
; CHECK-NEXT: [[TMP1:%.*]] = select <2 x i1> <i1 false, i1 true>, <2 x i64> [[RES1]], <2 x i64> [[PASSTHRU:%.*]]
; CHECK-NEXT: ret <2 x i64> [[TMP1]]
; CHECK-NEXT: [[RES1:%.*]] = insertelement <2 x i64> [[PASSTHRU:%.*]], i64 [[LOAD1]], i32 1
; CHECK-NEXT: ret <2 x i64> [[RES1]]
;
%ret = call <2 x i64> @llvm.masked.gather.v2i64.v2p0i64(<2 x i64*> %p, i32 8, <2 x i1> <i1 false, i1 true>, <2 x i64> %passthru)
ret <2 x i64> %ret