forked from OSchip/llvm-project
[ScalarizeMaskedMemIntrin] When expanding masked gathers, start with the passthru vector and insert the new load results into it.
Previously we started with undef and did a final merge with the passthru at the end. llvm-svn: 343273
This commit is contained in:
parent
45ad631b4c
commit
6911bfe263
|
@ -368,10 +368,8 @@ static void scalarizeMaskedGather(CallInst *CI) {
|
|||
|
||||
Builder.SetCurrentDebugLocation(CI->getDebugLoc());
|
||||
|
||||
Value *UndefVal = UndefValue::get(VecType);
|
||||
|
||||
// The result vector
|
||||
Value *VResult = UndefVal;
|
||||
Value *VResult = Src0;
|
||||
unsigned VectorWidth = VecType->getNumElements();
|
||||
|
||||
// Shorten the way if the mask is a vector of constants.
|
||||
|
@ -386,28 +384,17 @@ static void scalarizeMaskedGather(CallInst *CI) {
|
|||
VResult = Builder.CreateInsertElement(
|
||||
VResult, Load, Builder.getInt32(Idx), "Res" + Twine(Idx));
|
||||
}
|
||||
Value *NewI = Builder.CreateSelect(Mask, VResult, Src0);
|
||||
CI->replaceAllUsesWith(NewI);
|
||||
CI->replaceAllUsesWith(VResult);
|
||||
CI->eraseFromParent();
|
||||
return;
|
||||
}
|
||||
|
||||
PHINode *Phi = nullptr;
|
||||
Value *PrevPhi = UndefVal;
|
||||
|
||||
for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
|
||||
// Fill the "else" block, created in the previous iteration
|
||||
//
|
||||
// %Mask1 = extractelement <16 x i1> %Mask, i32 1
|
||||
// br i1 %Mask1, label %cond.load, label %else
|
||||
//
|
||||
if (Idx > 0) {
|
||||
Phi = Builder.CreatePHI(VecType, 2, "res.phi.else");
|
||||
Phi->addIncoming(VResult, CondBlock);
|
||||
Phi->addIncoming(PrevPhi, PrevIfBlock);
|
||||
PrevPhi = Phi;
|
||||
VResult = Phi;
|
||||
}
|
||||
|
||||
Value *Predicate = Builder.CreateExtractElement(Mask, Builder.getInt32(Idx),
|
||||
"Mask" + Twine(Idx));
|
||||
|
@ -425,8 +412,9 @@ static void scalarizeMaskedGather(CallInst *CI) {
|
|||
"Ptr" + Twine(Idx));
|
||||
LoadInst *Load =
|
||||
Builder.CreateAlignedLoad(Ptr, AlignVal, "Load" + Twine(Idx));
|
||||
VResult = Builder.CreateInsertElement(VResult, Load, Builder.getInt32(Idx),
|
||||
"Res" + Twine(Idx));
|
||||
Value *NewVResult = Builder.CreateInsertElement(VResult, Load,
|
||||
Builder.getInt32(Idx),
|
||||
"Res" + Twine(Idx));
|
||||
|
||||
// Create "else" block, fill it in the next iteration
|
||||
BasicBlock *NewIfBlock = CondBlock->splitBasicBlock(InsertPt, "else");
|
||||
|
@ -436,13 +424,14 @@ static void scalarizeMaskedGather(CallInst *CI) {
|
|||
OldBr->eraseFromParent();
|
||||
PrevIfBlock = IfBlock;
|
||||
IfBlock = NewIfBlock;
|
||||
|
||||
PHINode *Phi = Builder.CreatePHI(VecType, 2, "res.phi.else");
|
||||
Phi->addIncoming(NewVResult, CondBlock);
|
||||
Phi->addIncoming(VResult, PrevIfBlock);
|
||||
VResult = Phi;
|
||||
}
|
||||
|
||||
Phi = Builder.CreatePHI(VecType, 2, "res.phi.select");
|
||||
Phi->addIncoming(VResult, CondBlock);
|
||||
Phi->addIncoming(PrevPhi, PrevIfBlock);
|
||||
Value *NewI = Builder.CreateSelect(Mask, Phi, Src0);
|
||||
CI->replaceAllUsesWith(NewI);
|
||||
CI->replaceAllUsesWith(VResult);
|
||||
CI->eraseFromParent();
|
||||
}
|
||||
|
||||
|
|
|
@ -30,25 +30,24 @@ define <2 x i32> @masked_gather_v2i32(<2 x i32*>* %ptr, <2 x i1> %masks, <2 x i3
|
|||
;
|
||||
; NOGATHER-LABEL: masked_gather_v2i32:
|
||||
; NOGATHER: # %bb.0: # %entry
|
||||
; NOGATHER-NEXT: vmovdqa (%rdi), %xmm3
|
||||
; NOGATHER-NEXT: vmovdqa (%rdi), %xmm2
|
||||
; NOGATHER-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; NOGATHER-NEXT: # implicit-def: $xmm2
|
||||
; NOGATHER-NEXT: testb $1, %al
|
||||
; NOGATHER-NEXT: je .LBB0_2
|
||||
; NOGATHER-NEXT: # %bb.1: # %cond.load
|
||||
; NOGATHER-NEXT: vmovq %xmm3, %rax
|
||||
; NOGATHER-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero
|
||||
; NOGATHER-NEXT: vmovq %xmm2, %rax
|
||||
; NOGATHER-NEXT: movl (%rax), %eax
|
||||
; NOGATHER-NEXT: vpinsrq $0, %rax, %xmm1, %xmm1
|
||||
; NOGATHER-NEXT: .LBB0_2: # %else
|
||||
; NOGATHER-NEXT: vpextrb $8, %xmm0, %eax
|
||||
; NOGATHER-NEXT: testb $1, %al
|
||||
; NOGATHER-NEXT: je .LBB0_4
|
||||
; NOGATHER-NEXT: # %bb.3: # %cond.load1
|
||||
; NOGATHER-NEXT: vpextrq $1, %xmm3, %rax
|
||||
; NOGATHER-NEXT: vpextrq $1, %xmm2, %rax
|
||||
; NOGATHER-NEXT: movl (%rax), %eax
|
||||
; NOGATHER-NEXT: vpinsrq $1, %rax, %xmm2, %xmm2
|
||||
; NOGATHER-NEXT: vpinsrq $1, %rax, %xmm1, %xmm1
|
||||
; NOGATHER-NEXT: .LBB0_4: # %else2
|
||||
; NOGATHER-NEXT: vpsllq $63, %xmm0, %xmm0
|
||||
; NOGATHER-NEXT: vblendvpd %xmm0, %xmm2, %xmm1, %xmm0
|
||||
; NOGATHER-NEXT: vmovdqa %xmm1, %xmm0
|
||||
; NOGATHER-NEXT: retq
|
||||
entry:
|
||||
%ld = load <2 x i32*>, <2 x i32*>* %ptr
|
||||
|
@ -80,26 +79,24 @@ define <4 x i32> @masked_gather_v2i32_concat(<2 x i32*>* %ptr, <2 x i1> %masks,
|
|||
;
|
||||
; NOGATHER-LABEL: masked_gather_v2i32_concat:
|
||||
; NOGATHER: # %bb.0: # %entry
|
||||
; NOGATHER-NEXT: vmovdqa (%rdi), %xmm3
|
||||
; NOGATHER-NEXT: vmovdqa (%rdi), %xmm2
|
||||
; NOGATHER-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; NOGATHER-NEXT: # implicit-def: $xmm2
|
||||
; NOGATHER-NEXT: testb $1, %al
|
||||
; NOGATHER-NEXT: je .LBB1_2
|
||||
; NOGATHER-NEXT: # %bb.1: # %cond.load
|
||||
; NOGATHER-NEXT: vmovq %xmm3, %rax
|
||||
; NOGATHER-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero
|
||||
; NOGATHER-NEXT: vmovq %xmm2, %rax
|
||||
; NOGATHER-NEXT: movl (%rax), %eax
|
||||
; NOGATHER-NEXT: vpinsrq $0, %rax, %xmm1, %xmm1
|
||||
; NOGATHER-NEXT: .LBB1_2: # %else
|
||||
; NOGATHER-NEXT: vpextrb $8, %xmm0, %eax
|
||||
; NOGATHER-NEXT: testb $1, %al
|
||||
; NOGATHER-NEXT: je .LBB1_4
|
||||
; NOGATHER-NEXT: # %bb.3: # %cond.load1
|
||||
; NOGATHER-NEXT: vpextrq $1, %xmm3, %rax
|
||||
; NOGATHER-NEXT: vpextrq $1, %xmm2, %rax
|
||||
; NOGATHER-NEXT: movl (%rax), %eax
|
||||
; NOGATHER-NEXT: vpinsrq $1, %rax, %xmm2, %xmm2
|
||||
; NOGATHER-NEXT: vpinsrq $1, %rax, %xmm1, %xmm1
|
||||
; NOGATHER-NEXT: .LBB1_4: # %else2
|
||||
; NOGATHER-NEXT: vpsllq $63, %xmm0, %xmm0
|
||||
; NOGATHER-NEXT: vblendvpd %xmm0, %xmm2, %xmm1, %xmm0
|
||||
; NOGATHER-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3]
|
||||
; NOGATHER-NEXT: vpshufd {{.*#+}} xmm0 = xmm1[0,2,2,3]
|
||||
; NOGATHER-NEXT: retq
|
||||
entry:
|
||||
%ld = load <2 x i32*>, <2 x i32*>* %ptr
|
||||
|
@ -132,25 +129,23 @@ define <2 x float> @masked_gather_v2float(<2 x float*>* %ptr, <2 x i1> %masks, <
|
|||
;
|
||||
; NOGATHER-LABEL: masked_gather_v2float:
|
||||
; NOGATHER: # %bb.0: # %entry
|
||||
; NOGATHER-NEXT: vmovdqa (%rdi), %xmm3
|
||||
; NOGATHER-NEXT: vmovdqa (%rdi), %xmm2
|
||||
; NOGATHER-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; NOGATHER-NEXT: # implicit-def: $xmm2
|
||||
; NOGATHER-NEXT: testb $1, %al
|
||||
; NOGATHER-NEXT: je .LBB2_2
|
||||
; NOGATHER-NEXT: # %bb.1: # %cond.load
|
||||
; NOGATHER-NEXT: vmovq %xmm3, %rax
|
||||
; NOGATHER-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
|
||||
; NOGATHER-NEXT: vmovq %xmm2, %rax
|
||||
; NOGATHER-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
|
||||
; NOGATHER-NEXT: vblendps {{.*#+}} xmm1 = xmm3[0],xmm1[1,2,3]
|
||||
; NOGATHER-NEXT: .LBB2_2: # %else
|
||||
; NOGATHER-NEXT: vpextrb $8, %xmm0, %eax
|
||||
; NOGATHER-NEXT: testb $1, %al
|
||||
; NOGATHER-NEXT: je .LBB2_4
|
||||
; NOGATHER-NEXT: # %bb.3: # %cond.load1
|
||||
; NOGATHER-NEXT: vpextrq $1, %xmm3, %rax
|
||||
; NOGATHER-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0],mem[0],xmm2[2,3]
|
||||
; NOGATHER-NEXT: vpextrq $1, %xmm2, %rax
|
||||
; NOGATHER-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[2,3]
|
||||
; NOGATHER-NEXT: .LBB2_4: # %else2
|
||||
; NOGATHER-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
|
||||
; NOGATHER-NEXT: vpslld $31, %xmm0, %xmm0
|
||||
; NOGATHER-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0
|
||||
; NOGATHER-NEXT: vmovaps %xmm1, %xmm0
|
||||
; NOGATHER-NEXT: retq
|
||||
entry:
|
||||
%ld = load <2 x float*>, <2 x float*>* %ptr
|
||||
|
@ -180,25 +175,23 @@ define <4 x float> @masked_gather_v2float_concat(<2 x float*>* %ptr, <2 x i1> %m
|
|||
;
|
||||
; NOGATHER-LABEL: masked_gather_v2float_concat:
|
||||
; NOGATHER: # %bb.0: # %entry
|
||||
; NOGATHER-NEXT: vmovdqa (%rdi), %xmm3
|
||||
; NOGATHER-NEXT: vmovdqa (%rdi), %xmm2
|
||||
; NOGATHER-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; NOGATHER-NEXT: # implicit-def: $xmm2
|
||||
; NOGATHER-NEXT: testb $1, %al
|
||||
; NOGATHER-NEXT: je .LBB3_2
|
||||
; NOGATHER-NEXT: # %bb.1: # %cond.load
|
||||
; NOGATHER-NEXT: vmovq %xmm3, %rax
|
||||
; NOGATHER-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
|
||||
; NOGATHER-NEXT: vmovq %xmm2, %rax
|
||||
; NOGATHER-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
|
||||
; NOGATHER-NEXT: vblendps {{.*#+}} xmm1 = xmm3[0],xmm1[1,2,3]
|
||||
; NOGATHER-NEXT: .LBB3_2: # %else
|
||||
; NOGATHER-NEXT: vpextrb $8, %xmm0, %eax
|
||||
; NOGATHER-NEXT: testb $1, %al
|
||||
; NOGATHER-NEXT: je .LBB3_4
|
||||
; NOGATHER-NEXT: # %bb.3: # %cond.load1
|
||||
; NOGATHER-NEXT: vpextrq $1, %xmm3, %rax
|
||||
; NOGATHER-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0],mem[0],xmm2[2,3]
|
||||
; NOGATHER-NEXT: vpextrq $1, %xmm2, %rax
|
||||
; NOGATHER-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[2,3]
|
||||
; NOGATHER-NEXT: .LBB3_4: # %else2
|
||||
; NOGATHER-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
|
||||
; NOGATHER-NEXT: vpslld $31, %xmm0, %xmm0
|
||||
; NOGATHER-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0
|
||||
; NOGATHER-NEXT: vmovaps %xmm1, %xmm0
|
||||
; NOGATHER-NEXT: retq
|
||||
entry:
|
||||
%ld = load <2 x float*>, <2 x float*>* %ptr
|
||||
|
@ -229,27 +222,26 @@ define <4 x i32> @masked_gather_v4i32(<4 x i32*> %ptrs, <4 x i1> %masks, <4 x i3
|
|||
; NOGATHER-LABEL: masked_gather_v4i32:
|
||||
; NOGATHER: # %bb.0: # %entry
|
||||
; NOGATHER-NEXT: vpextrb $0, %xmm1, %eax
|
||||
; NOGATHER-NEXT: # implicit-def: $xmm3
|
||||
; NOGATHER-NEXT: testb $1, %al
|
||||
; NOGATHER-NEXT: je .LBB4_2
|
||||
; NOGATHER-NEXT: # %bb.1: # %cond.load
|
||||
; NOGATHER-NEXT: vmovq %xmm0, %rax
|
||||
; NOGATHER-NEXT: vmovd {{.*#+}} xmm3 = mem[0],zero,zero,zero
|
||||
; NOGATHER-NEXT: vpinsrd $0, (%rax), %xmm2, %xmm2
|
||||
; NOGATHER-NEXT: .LBB4_2: # %else
|
||||
; NOGATHER-NEXT: vpextrb $4, %xmm1, %eax
|
||||
; NOGATHER-NEXT: testb $1, %al
|
||||
; NOGATHER-NEXT: je .LBB4_4
|
||||
; NOGATHER-NEXT: # %bb.3: # %cond.load1
|
||||
; NOGATHER-NEXT: vpextrq $1, %xmm0, %rax
|
||||
; NOGATHER-NEXT: vpinsrd $1, (%rax), %xmm3, %xmm3
|
||||
; NOGATHER-NEXT: vpinsrd $1, (%rax), %xmm2, %xmm2
|
||||
; NOGATHER-NEXT: .LBB4_4: # %else2
|
||||
; NOGATHER-NEXT: vpextrb $8, %xmm1, %eax
|
||||
; NOGATHER-NEXT: testb $1, %al
|
||||
; NOGATHER-NEXT: je .LBB4_6
|
||||
; NOGATHER-NEXT: # %bb.5: # %cond.load4
|
||||
; NOGATHER-NEXT: vextractf128 $1, %ymm0, %xmm4
|
||||
; NOGATHER-NEXT: vmovq %xmm4, %rax
|
||||
; NOGATHER-NEXT: vpinsrd $2, (%rax), %xmm3, %xmm3
|
||||
; NOGATHER-NEXT: vextractf128 $1, %ymm0, %xmm3
|
||||
; NOGATHER-NEXT: vmovq %xmm3, %rax
|
||||
; NOGATHER-NEXT: vpinsrd $2, (%rax), %xmm2, %xmm2
|
||||
; NOGATHER-NEXT: .LBB4_6: # %else5
|
||||
; NOGATHER-NEXT: vpextrb $12, %xmm1, %eax
|
||||
; NOGATHER-NEXT: testb $1, %al
|
||||
|
@ -257,10 +249,9 @@ define <4 x i32> @masked_gather_v4i32(<4 x i32*> %ptrs, <4 x i1> %masks, <4 x i3
|
|||
; NOGATHER-NEXT: # %bb.7: # %cond.load7
|
||||
; NOGATHER-NEXT: vextractf128 $1, %ymm0, %xmm0
|
||||
; NOGATHER-NEXT: vpextrq $1, %xmm0, %rax
|
||||
; NOGATHER-NEXT: vpinsrd $3, (%rax), %xmm3, %xmm3
|
||||
; NOGATHER-NEXT: vpinsrd $3, (%rax), %xmm2, %xmm2
|
||||
; NOGATHER-NEXT: .LBB4_8: # %else8
|
||||
; NOGATHER-NEXT: vpslld $31, %xmm1, %xmm0
|
||||
; NOGATHER-NEXT: vblendvps %xmm0, %xmm3, %xmm2, %xmm0
|
||||
; NOGATHER-NEXT: vmovdqa %xmm2, %xmm0
|
||||
; NOGATHER-NEXT: vzeroupper
|
||||
; NOGATHER-NEXT: retq
|
||||
entry:
|
||||
|
@ -289,27 +280,27 @@ define <4 x float> @masked_gather_v4float(<4 x float*> %ptrs, <4 x i1> %masks, <
|
|||
; NOGATHER-LABEL: masked_gather_v4float:
|
||||
; NOGATHER: # %bb.0: # %entry
|
||||
; NOGATHER-NEXT: vpextrb $0, %xmm1, %eax
|
||||
; NOGATHER-NEXT: # implicit-def: $xmm3
|
||||
; NOGATHER-NEXT: testb $1, %al
|
||||
; NOGATHER-NEXT: je .LBB5_2
|
||||
; NOGATHER-NEXT: # %bb.1: # %cond.load
|
||||
; NOGATHER-NEXT: vmovq %xmm0, %rax
|
||||
; NOGATHER-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
|
||||
; NOGATHER-NEXT: vblendps {{.*#+}} xmm2 = xmm3[0],xmm2[1,2,3]
|
||||
; NOGATHER-NEXT: .LBB5_2: # %else
|
||||
; NOGATHER-NEXT: vpextrb $4, %xmm1, %eax
|
||||
; NOGATHER-NEXT: testb $1, %al
|
||||
; NOGATHER-NEXT: je .LBB5_4
|
||||
; NOGATHER-NEXT: # %bb.3: # %cond.load1
|
||||
; NOGATHER-NEXT: vpextrq $1, %xmm0, %rax
|
||||
; NOGATHER-NEXT: vinsertps {{.*#+}} xmm3 = xmm3[0],mem[0],xmm3[2,3]
|
||||
; NOGATHER-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0],mem[0],xmm2[2,3]
|
||||
; NOGATHER-NEXT: .LBB5_4: # %else2
|
||||
; NOGATHER-NEXT: vpextrb $8, %xmm1, %eax
|
||||
; NOGATHER-NEXT: testb $1, %al
|
||||
; NOGATHER-NEXT: je .LBB5_6
|
||||
; NOGATHER-NEXT: # %bb.5: # %cond.load4
|
||||
; NOGATHER-NEXT: vextractf128 $1, %ymm0, %xmm4
|
||||
; NOGATHER-NEXT: vmovq %xmm4, %rax
|
||||
; NOGATHER-NEXT: vinsertps {{.*#+}} xmm3 = xmm3[0,1],mem[0],xmm3[3]
|
||||
; NOGATHER-NEXT: vextractf128 $1, %ymm0, %xmm3
|
||||
; NOGATHER-NEXT: vmovq %xmm3, %rax
|
||||
; NOGATHER-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],mem[0],xmm2[3]
|
||||
; NOGATHER-NEXT: .LBB5_6: # %else5
|
||||
; NOGATHER-NEXT: vpextrb $12, %xmm1, %eax
|
||||
; NOGATHER-NEXT: testb $1, %al
|
||||
|
@ -317,10 +308,9 @@ define <4 x float> @masked_gather_v4float(<4 x float*> %ptrs, <4 x i1> %masks, <
|
|||
; NOGATHER-NEXT: # %bb.7: # %cond.load7
|
||||
; NOGATHER-NEXT: vextractf128 $1, %ymm0, %xmm0
|
||||
; NOGATHER-NEXT: vpextrq $1, %xmm0, %rax
|
||||
; NOGATHER-NEXT: vinsertps {{.*#+}} xmm3 = xmm3[0,1,2],mem[0]
|
||||
; NOGATHER-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1,2],mem[0]
|
||||
; NOGATHER-NEXT: .LBB5_8: # %else8
|
||||
; NOGATHER-NEXT: vpslld $31, %xmm1, %xmm0
|
||||
; NOGATHER-NEXT: vblendvps %xmm0, %xmm3, %xmm2, %xmm0
|
||||
; NOGATHER-NEXT: vmovaps %xmm2, %xmm0
|
||||
; NOGATHER-NEXT: vzeroupper
|
||||
; NOGATHER-NEXT: retq
|
||||
entry:
|
||||
|
@ -357,86 +347,81 @@ define <8 x i32> @masked_gather_v8i32(<8 x i32*>* %ptr, <8 x i1> %masks, <8 x i3
|
|||
;
|
||||
; NOGATHER-LABEL: masked_gather_v8i32:
|
||||
; NOGATHER: # %bb.0: # %entry
|
||||
; NOGATHER-NEXT: vmovdqa (%rdi), %ymm4
|
||||
; NOGATHER-NEXT: vmovdqa 32(%rdi), %ymm3
|
||||
; NOGATHER-NEXT: vmovdqa (%rdi), %ymm3
|
||||
; NOGATHER-NEXT: vmovdqa 32(%rdi), %ymm2
|
||||
; NOGATHER-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; NOGATHER-NEXT: # implicit-def: $ymm2
|
||||
; NOGATHER-NEXT: testb $1, %al
|
||||
; NOGATHER-NEXT: je .LBB6_2
|
||||
; NOGATHER-NEXT: # %bb.1: # %cond.load
|
||||
; NOGATHER-NEXT: vmovq %xmm4, %rax
|
||||
; NOGATHER-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero
|
||||
; NOGATHER-NEXT: vmovq %xmm3, %rax
|
||||
; NOGATHER-NEXT: vpinsrd $0, (%rax), %xmm1, %xmm4
|
||||
; NOGATHER-NEXT: vblendps {{.*#+}} ymm1 = ymm4[0,1,2,3],ymm1[4,5,6,7]
|
||||
; NOGATHER-NEXT: .LBB6_2: # %else
|
||||
; NOGATHER-NEXT: vpextrb $2, %xmm0, %eax
|
||||
; NOGATHER-NEXT: testb $1, %al
|
||||
; NOGATHER-NEXT: je .LBB6_4
|
||||
; NOGATHER-NEXT: # %bb.3: # %cond.load1
|
||||
; NOGATHER-NEXT: vpextrq $1, %xmm4, %rax
|
||||
; NOGATHER-NEXT: vpinsrd $1, (%rax), %xmm2, %xmm5
|
||||
; NOGATHER-NEXT: vblendps {{.*#+}} ymm2 = ymm5[0,1,2,3],ymm2[4,5,6,7]
|
||||
; NOGATHER-NEXT: vpextrq $1, %xmm3, %rax
|
||||
; NOGATHER-NEXT: vpinsrd $1, (%rax), %xmm1, %xmm4
|
||||
; NOGATHER-NEXT: vblendps {{.*#+}} ymm1 = ymm4[0,1,2,3],ymm1[4,5,6,7]
|
||||
; NOGATHER-NEXT: .LBB6_4: # %else2
|
||||
; NOGATHER-NEXT: vpextrb $4, %xmm0, %eax
|
||||
; NOGATHER-NEXT: testb $1, %al
|
||||
; NOGATHER-NEXT: je .LBB6_6
|
||||
; NOGATHER-NEXT: # %bb.5: # %cond.load4
|
||||
; NOGATHER-NEXT: vextractf128 $1, %ymm4, %xmm5
|
||||
; NOGATHER-NEXT: vmovq %xmm5, %rax
|
||||
; NOGATHER-NEXT: vpinsrd $2, (%rax), %xmm2, %xmm5
|
||||
; NOGATHER-NEXT: vblendps {{.*#+}} ymm2 = ymm5[0,1,2,3],ymm2[4,5,6,7]
|
||||
; NOGATHER-NEXT: vextractf128 $1, %ymm3, %xmm4
|
||||
; NOGATHER-NEXT: vmovq %xmm4, %rax
|
||||
; NOGATHER-NEXT: vpinsrd $2, (%rax), %xmm1, %xmm4
|
||||
; NOGATHER-NEXT: vblendps {{.*#+}} ymm1 = ymm4[0,1,2,3],ymm1[4,5,6,7]
|
||||
; NOGATHER-NEXT: .LBB6_6: # %else5
|
||||
; NOGATHER-NEXT: vpextrb $6, %xmm0, %eax
|
||||
; NOGATHER-NEXT: testb $1, %al
|
||||
; NOGATHER-NEXT: je .LBB6_8
|
||||
; NOGATHER-NEXT: # %bb.7: # %cond.load7
|
||||
; NOGATHER-NEXT: vextractf128 $1, %ymm4, %xmm4
|
||||
; NOGATHER-NEXT: vpextrq $1, %xmm4, %rax
|
||||
; NOGATHER-NEXT: vpinsrd $3, (%rax), %xmm2, %xmm4
|
||||
; NOGATHER-NEXT: vblendps {{.*#+}} ymm2 = ymm4[0,1,2,3],ymm2[4,5,6,7]
|
||||
; NOGATHER-NEXT: vextractf128 $1, %ymm3, %xmm3
|
||||
; NOGATHER-NEXT: vpextrq $1, %xmm3, %rax
|
||||
; NOGATHER-NEXT: vpinsrd $3, (%rax), %xmm1, %xmm3
|
||||
; NOGATHER-NEXT: vblendps {{.*#+}} ymm1 = ymm3[0,1,2,3],ymm1[4,5,6,7]
|
||||
; NOGATHER-NEXT: .LBB6_8: # %else8
|
||||
; NOGATHER-NEXT: vpextrb $8, %xmm0, %eax
|
||||
; NOGATHER-NEXT: testb $1, %al
|
||||
; NOGATHER-NEXT: je .LBB6_10
|
||||
; NOGATHER-NEXT: # %bb.9: # %cond.load10
|
||||
; NOGATHER-NEXT: vmovq %xmm3, %rax
|
||||
; NOGATHER-NEXT: vextractf128 $1, %ymm2, %xmm4
|
||||
; NOGATHER-NEXT: vpinsrd $0, (%rax), %xmm4, %xmm4
|
||||
; NOGATHER-NEXT: vinsertf128 $1, %xmm4, %ymm2, %ymm2
|
||||
; NOGATHER-NEXT: vmovq %xmm2, %rax
|
||||
; NOGATHER-NEXT: vextractf128 $1, %ymm1, %xmm3
|
||||
; NOGATHER-NEXT: vpinsrd $0, (%rax), %xmm3, %xmm3
|
||||
; NOGATHER-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
|
||||
; NOGATHER-NEXT: .LBB6_10: # %else11
|
||||
; NOGATHER-NEXT: vpextrb $10, %xmm0, %eax
|
||||
; NOGATHER-NEXT: testb $1, %al
|
||||
; NOGATHER-NEXT: je .LBB6_12
|
||||
; NOGATHER-NEXT: # %bb.11: # %cond.load13
|
||||
; NOGATHER-NEXT: vpextrq $1, %xmm3, %rax
|
||||
; NOGATHER-NEXT: vextractf128 $1, %ymm2, %xmm4
|
||||
; NOGATHER-NEXT: vpinsrd $1, (%rax), %xmm4, %xmm4
|
||||
; NOGATHER-NEXT: vinsertf128 $1, %xmm4, %ymm2, %ymm2
|
||||
; NOGATHER-NEXT: vpextrq $1, %xmm2, %rax
|
||||
; NOGATHER-NEXT: vextractf128 $1, %ymm1, %xmm3
|
||||
; NOGATHER-NEXT: vpinsrd $1, (%rax), %xmm3, %xmm3
|
||||
; NOGATHER-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
|
||||
; NOGATHER-NEXT: .LBB6_12: # %else14
|
||||
; NOGATHER-NEXT: vpextrb $12, %xmm0, %eax
|
||||
; NOGATHER-NEXT: testb $1, %al
|
||||
; NOGATHER-NEXT: je .LBB6_14
|
||||
; NOGATHER-NEXT: # %bb.13: # %cond.load16
|
||||
; NOGATHER-NEXT: vextractf128 $1, %ymm3, %xmm4
|
||||
; NOGATHER-NEXT: vmovq %xmm4, %rax
|
||||
; NOGATHER-NEXT: vextractf128 $1, %ymm2, %xmm4
|
||||
; NOGATHER-NEXT: vpinsrd $2, (%rax), %xmm4, %xmm4
|
||||
; NOGATHER-NEXT: vinsertf128 $1, %xmm4, %ymm2, %ymm2
|
||||
; NOGATHER-NEXT: vextractf128 $1, %ymm2, %xmm3
|
||||
; NOGATHER-NEXT: vmovq %xmm3, %rax
|
||||
; NOGATHER-NEXT: vextractf128 $1, %ymm1, %xmm3
|
||||
; NOGATHER-NEXT: vpinsrd $2, (%rax), %xmm3, %xmm3
|
||||
; NOGATHER-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
|
||||
; NOGATHER-NEXT: .LBB6_14: # %else17
|
||||
; NOGATHER-NEXT: vpextrb $14, %xmm0, %eax
|
||||
; NOGATHER-NEXT: testb $1, %al
|
||||
; NOGATHER-NEXT: je .LBB6_16
|
||||
; NOGATHER-NEXT: # %bb.15: # %cond.load19
|
||||
; NOGATHER-NEXT: vextractf128 $1, %ymm3, %xmm3
|
||||
; NOGATHER-NEXT: vpextrq $1, %xmm3, %rax
|
||||
; NOGATHER-NEXT: vextractf128 $1, %ymm2, %xmm3
|
||||
; NOGATHER-NEXT: vpinsrd $3, (%rax), %xmm3, %xmm3
|
||||
; NOGATHER-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
|
||||
; NOGATHER-NEXT: vextractf128 $1, %ymm2, %xmm0
|
||||
; NOGATHER-NEXT: vpextrq $1, %xmm0, %rax
|
||||
; NOGATHER-NEXT: vextractf128 $1, %ymm1, %xmm0
|
||||
; NOGATHER-NEXT: vpinsrd $3, (%rax), %xmm0, %xmm0
|
||||
; NOGATHER-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm1
|
||||
; NOGATHER-NEXT: .LBB6_16: # %else20
|
||||
; NOGATHER-NEXT: vpmovzxwd {{.*#+}} xmm3 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
|
||||
; NOGATHER-NEXT: vpslld $31, %xmm3, %xmm3
|
||||
; NOGATHER-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
|
||||
; NOGATHER-NEXT: vpslld $31, %xmm0, %xmm0
|
||||
; NOGATHER-NEXT: vinsertf128 $1, %xmm0, %ymm3, %ymm0
|
||||
; NOGATHER-NEXT: vblendvps %ymm0, %ymm2, %ymm1, %ymm0
|
||||
; NOGATHER-NEXT: vmovaps %ymm1, %ymm0
|
||||
; NOGATHER-NEXT: retq
|
||||
entry:
|
||||
%ld = load <8 x i32*>, <8 x i32*>* %ptr
|
||||
|
@ -473,87 +458,82 @@ define <8 x float> @masked_gather_v8float(<8 x float*>* %ptr, <8 x i1> %masks, <
|
|||
;
|
||||
; NOGATHER-LABEL: masked_gather_v8float:
|
||||
; NOGATHER: # %bb.0: # %entry
|
||||
; NOGATHER-NEXT: vmovdqa (%rdi), %ymm4
|
||||
; NOGATHER-NEXT: vmovdqa 32(%rdi), %ymm3
|
||||
; NOGATHER-NEXT: vmovdqa (%rdi), %ymm3
|
||||
; NOGATHER-NEXT: vmovdqa 32(%rdi), %ymm2
|
||||
; NOGATHER-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; NOGATHER-NEXT: # implicit-def: $ymm2
|
||||
; NOGATHER-NEXT: testb $1, %al
|
||||
; NOGATHER-NEXT: je .LBB7_2
|
||||
; NOGATHER-NEXT: # %bb.1: # %cond.load
|
||||
; NOGATHER-NEXT: vmovq %xmm4, %rax
|
||||
; NOGATHER-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
|
||||
; NOGATHER-NEXT: vmovq %xmm3, %rax
|
||||
; NOGATHER-NEXT: vmovss {{.*#+}} xmm4 = mem[0],zero,zero,zero
|
||||
; NOGATHER-NEXT: vblendps {{.*#+}} ymm1 = ymm4[0],ymm1[1,2,3,4,5,6,7]
|
||||
; NOGATHER-NEXT: .LBB7_2: # %else
|
||||
; NOGATHER-NEXT: vpextrb $2, %xmm0, %eax
|
||||
; NOGATHER-NEXT: testb $1, %al
|
||||
; NOGATHER-NEXT: je .LBB7_4
|
||||
; NOGATHER-NEXT: # %bb.3: # %cond.load1
|
||||
; NOGATHER-NEXT: vpextrq $1, %xmm4, %rax
|
||||
; NOGATHER-NEXT: vinsertps {{.*#+}} xmm5 = xmm2[0],mem[0],xmm2[2,3]
|
||||
; NOGATHER-NEXT: vblendps {{.*#+}} ymm2 = ymm5[0,1,2,3],ymm2[4,5,6,7]
|
||||
; NOGATHER-NEXT: vpextrq $1, %xmm3, %rax
|
||||
; NOGATHER-NEXT: vinsertps {{.*#+}} xmm4 = xmm1[0],mem[0],xmm1[2,3]
|
||||
; NOGATHER-NEXT: vblendps {{.*#+}} ymm1 = ymm4[0,1,2,3],ymm1[4,5,6,7]
|
||||
; NOGATHER-NEXT: .LBB7_4: # %else2
|
||||
; NOGATHER-NEXT: vpextrb $4, %xmm0, %eax
|
||||
; NOGATHER-NEXT: testb $1, %al
|
||||
; NOGATHER-NEXT: je .LBB7_6
|
||||
; NOGATHER-NEXT: # %bb.5: # %cond.load4
|
||||
; NOGATHER-NEXT: vextractf128 $1, %ymm4, %xmm5
|
||||
; NOGATHER-NEXT: vmovq %xmm5, %rax
|
||||
; NOGATHER-NEXT: vinsertps {{.*#+}} xmm5 = xmm2[0,1],mem[0],xmm2[3]
|
||||
; NOGATHER-NEXT: vblendps {{.*#+}} ymm2 = ymm5[0,1,2,3],ymm2[4,5,6,7]
|
||||
; NOGATHER-NEXT: vextractf128 $1, %ymm3, %xmm4
|
||||
; NOGATHER-NEXT: vmovq %xmm4, %rax
|
||||
; NOGATHER-NEXT: vinsertps {{.*#+}} xmm4 = xmm1[0,1],mem[0],xmm1[3]
|
||||
; NOGATHER-NEXT: vblendps {{.*#+}} ymm1 = ymm4[0,1,2,3],ymm1[4,5,6,7]
|
||||
; NOGATHER-NEXT: .LBB7_6: # %else5
|
||||
; NOGATHER-NEXT: vpextrb $6, %xmm0, %eax
|
||||
; NOGATHER-NEXT: testb $1, %al
|
||||
; NOGATHER-NEXT: je .LBB7_8
|
||||
; NOGATHER-NEXT: # %bb.7: # %cond.load7
|
||||
; NOGATHER-NEXT: vextractf128 $1, %ymm4, %xmm4
|
||||
; NOGATHER-NEXT: vpextrq $1, %xmm4, %rax
|
||||
; NOGATHER-NEXT: vinsertps {{.*#+}} xmm4 = xmm2[0,1,2],mem[0]
|
||||
; NOGATHER-NEXT: vblendps {{.*#+}} ymm2 = ymm4[0,1,2,3],ymm2[4,5,6,7]
|
||||
; NOGATHER-NEXT: vextractf128 $1, %ymm3, %xmm3
|
||||
; NOGATHER-NEXT: vpextrq $1, %xmm3, %rax
|
||||
; NOGATHER-NEXT: vinsertps {{.*#+}} xmm3 = xmm1[0,1,2],mem[0]
|
||||
; NOGATHER-NEXT: vblendps {{.*#+}} ymm1 = ymm3[0,1,2,3],ymm1[4,5,6,7]
|
||||
; NOGATHER-NEXT: .LBB7_8: # %else8
|
||||
; NOGATHER-NEXT: vpextrb $8, %xmm0, %eax
|
||||
; NOGATHER-NEXT: testb $1, %al
|
||||
; NOGATHER-NEXT: je .LBB7_10
|
||||
; NOGATHER-NEXT: # %bb.9: # %cond.load10
|
||||
; NOGATHER-NEXT: vmovq %xmm3, %rax
|
||||
; NOGATHER-NEXT: vmovss {{.*#+}} xmm4 = mem[0],zero,zero,zero
|
||||
; NOGATHER-NEXT: vextractf128 $1, %ymm2, %xmm5
|
||||
; NOGATHER-NEXT: vblendps {{.*#+}} xmm4 = xmm4[0],xmm5[1,2,3]
|
||||
; NOGATHER-NEXT: vinsertf128 $1, %xmm4, %ymm2, %ymm2
|
||||
; NOGATHER-NEXT: vmovq %xmm2, %rax
|
||||
; NOGATHER-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
|
||||
; NOGATHER-NEXT: vextractf128 $1, %ymm1, %xmm4
|
||||
; NOGATHER-NEXT: vblendps {{.*#+}} xmm3 = xmm3[0],xmm4[1,2,3]
|
||||
; NOGATHER-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
|
||||
; NOGATHER-NEXT: .LBB7_10: # %else11
|
||||
; NOGATHER-NEXT: vpextrb $10, %xmm0, %eax
|
||||
; NOGATHER-NEXT: testb $1, %al
|
||||
; NOGATHER-NEXT: je .LBB7_12
|
||||
; NOGATHER-NEXT: # %bb.11: # %cond.load13
|
||||
; NOGATHER-NEXT: vpextrq $1, %xmm3, %rax
|
||||
; NOGATHER-NEXT: vextractf128 $1, %ymm2, %xmm4
|
||||
; NOGATHER-NEXT: vinsertps {{.*#+}} xmm4 = xmm4[0],mem[0],xmm4[2,3]
|
||||
; NOGATHER-NEXT: vinsertf128 $1, %xmm4, %ymm2, %ymm2
|
||||
; NOGATHER-NEXT: vpextrq $1, %xmm2, %rax
|
||||
; NOGATHER-NEXT: vextractf128 $1, %ymm1, %xmm3
|
||||
; NOGATHER-NEXT: vinsertps {{.*#+}} xmm3 = xmm3[0],mem[0],xmm3[2,3]
|
||||
; NOGATHER-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
|
||||
; NOGATHER-NEXT: .LBB7_12: # %else14
|
||||
; NOGATHER-NEXT: vpextrb $12, %xmm0, %eax
|
||||
; NOGATHER-NEXT: testb $1, %al
|
||||
; NOGATHER-NEXT: je .LBB7_14
|
||||
; NOGATHER-NEXT: # %bb.13: # %cond.load16
|
||||
; NOGATHER-NEXT: vextractf128 $1, %ymm3, %xmm4
|
||||
; NOGATHER-NEXT: vmovq %xmm4, %rax
|
||||
; NOGATHER-NEXT: vextractf128 $1, %ymm2, %xmm4
|
||||
; NOGATHER-NEXT: vinsertps {{.*#+}} xmm4 = xmm4[0,1],mem[0],xmm4[3]
|
||||
; NOGATHER-NEXT: vinsertf128 $1, %xmm4, %ymm2, %ymm2
|
||||
; NOGATHER-NEXT: vextractf128 $1, %ymm2, %xmm3
|
||||
; NOGATHER-NEXT: vmovq %xmm3, %rax
|
||||
; NOGATHER-NEXT: vextractf128 $1, %ymm1, %xmm3
|
||||
; NOGATHER-NEXT: vinsertps {{.*#+}} xmm3 = xmm3[0,1],mem[0],xmm3[3]
|
||||
; NOGATHER-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
|
||||
; NOGATHER-NEXT: .LBB7_14: # %else17
|
||||
; NOGATHER-NEXT: vpextrb $14, %xmm0, %eax
|
||||
; NOGATHER-NEXT: testb $1, %al
|
||||
; NOGATHER-NEXT: je .LBB7_16
|
||||
; NOGATHER-NEXT: # %bb.15: # %cond.load19
|
||||
; NOGATHER-NEXT: vextractf128 $1, %ymm3, %xmm3
|
||||
; NOGATHER-NEXT: vpextrq $1, %xmm3, %rax
|
||||
; NOGATHER-NEXT: vextractf128 $1, %ymm2, %xmm3
|
||||
; NOGATHER-NEXT: vinsertps {{.*#+}} xmm3 = xmm3[0,1,2],mem[0]
|
||||
; NOGATHER-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
|
||||
; NOGATHER-NEXT: vextractf128 $1, %ymm2, %xmm0
|
||||
; NOGATHER-NEXT: vpextrq $1, %xmm0, %rax
|
||||
; NOGATHER-NEXT: vextractf128 $1, %ymm1, %xmm0
|
||||
; NOGATHER-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0]
|
||||
; NOGATHER-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm1
|
||||
; NOGATHER-NEXT: .LBB7_16: # %else20
|
||||
; NOGATHER-NEXT: vpmovzxwd {{.*#+}} xmm3 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
|
||||
; NOGATHER-NEXT: vpslld $31, %xmm3, %xmm3
|
||||
; NOGATHER-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
|
||||
; NOGATHER-NEXT: vpslld $31, %xmm0, %xmm0
|
||||
; NOGATHER-NEXT: vinsertf128 $1, %xmm0, %ymm3, %ymm0
|
||||
; NOGATHER-NEXT: vblendvps %ymm0, %ymm2, %ymm1, %ymm0
|
||||
; NOGATHER-NEXT: vmovaps %ymm1, %ymm0
|
||||
; NOGATHER-NEXT: retq
|
||||
entry:
|
||||
%ld = load <8 x float*>, <8 x float*>* %ptr
|
||||
|
@ -585,50 +565,44 @@ define <4 x i64> @masked_gather_v4i64(<4 x i64*>* %ptr, <4 x i1> %masks, <4 x i6
|
|||
;
|
||||
; NOGATHER-LABEL: masked_gather_v4i64:
|
||||
; NOGATHER: # %bb.0: # %entry
|
||||
; NOGATHER-NEXT: vmovdqa (%rdi), %ymm3
|
||||
; NOGATHER-NEXT: vmovdqa (%rdi), %ymm2
|
||||
; NOGATHER-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; NOGATHER-NEXT: # implicit-def: $ymm2
|
||||
; NOGATHER-NEXT: testb $1, %al
|
||||
; NOGATHER-NEXT: je .LBB8_2
|
||||
; NOGATHER-NEXT: # %bb.1: # %cond.load
|
||||
; NOGATHER-NEXT: vmovq %xmm3, %rax
|
||||
; NOGATHER-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero
|
||||
; NOGATHER-NEXT: vmovq %xmm2, %rax
|
||||
; NOGATHER-NEXT: vpinsrq $0, (%rax), %xmm1, %xmm3
|
||||
; NOGATHER-NEXT: vblendps {{.*#+}} ymm1 = ymm3[0,1,2,3],ymm1[4,5,6,7]
|
||||
; NOGATHER-NEXT: .LBB8_2: # %else
|
||||
; NOGATHER-NEXT: vpextrb $4, %xmm0, %eax
|
||||
; NOGATHER-NEXT: testb $1, %al
|
||||
; NOGATHER-NEXT: je .LBB8_4
|
||||
; NOGATHER-NEXT: # %bb.3: # %cond.load1
|
||||
; NOGATHER-NEXT: vpextrq $1, %xmm3, %rax
|
||||
; NOGATHER-NEXT: vpinsrq $1, (%rax), %xmm2, %xmm4
|
||||
; NOGATHER-NEXT: vblendps {{.*#+}} ymm2 = ymm4[0,1,2,3],ymm2[4,5,6,7]
|
||||
; NOGATHER-NEXT: vpextrq $1, %xmm2, %rax
|
||||
; NOGATHER-NEXT: vpinsrq $1, (%rax), %xmm1, %xmm3
|
||||
; NOGATHER-NEXT: vblendps {{.*#+}} ymm1 = ymm3[0,1,2,3],ymm1[4,5,6,7]
|
||||
; NOGATHER-NEXT: .LBB8_4: # %else2
|
||||
; NOGATHER-NEXT: vpextrb $8, %xmm0, %eax
|
||||
; NOGATHER-NEXT: testb $1, %al
|
||||
; NOGATHER-NEXT: je .LBB8_6
|
||||
; NOGATHER-NEXT: # %bb.5: # %cond.load4
|
||||
; NOGATHER-NEXT: vextractf128 $1, %ymm3, %xmm4
|
||||
; NOGATHER-NEXT: vmovq %xmm4, %rax
|
||||
; NOGATHER-NEXT: vextractf128 $1, %ymm2, %xmm4
|
||||
; NOGATHER-NEXT: vpinsrq $0, (%rax), %xmm4, %xmm4
|
||||
; NOGATHER-NEXT: vinsertf128 $1, %xmm4, %ymm2, %ymm2
|
||||
; NOGATHER-NEXT: vextractf128 $1, %ymm2, %xmm3
|
||||
; NOGATHER-NEXT: vmovq %xmm3, %rax
|
||||
; NOGATHER-NEXT: vextractf128 $1, %ymm1, %xmm3
|
||||
; NOGATHER-NEXT: vpinsrq $0, (%rax), %xmm3, %xmm3
|
||||
; NOGATHER-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
|
||||
; NOGATHER-NEXT: .LBB8_6: # %else5
|
||||
; NOGATHER-NEXT: vpextrb $12, %xmm0, %eax
|
||||
; NOGATHER-NEXT: testb $1, %al
|
||||
; NOGATHER-NEXT: je .LBB8_8
|
||||
; NOGATHER-NEXT: # %bb.7: # %cond.load7
|
||||
; NOGATHER-NEXT: vextractf128 $1, %ymm3, %xmm3
|
||||
; NOGATHER-NEXT: vpextrq $1, %xmm3, %rax
|
||||
; NOGATHER-NEXT: vextractf128 $1, %ymm2, %xmm3
|
||||
; NOGATHER-NEXT: vpinsrq $1, (%rax), %xmm3, %xmm3
|
||||
; NOGATHER-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
|
||||
; NOGATHER-NEXT: vextractf128 $1, %ymm2, %xmm0
|
||||
; NOGATHER-NEXT: vpextrq $1, %xmm0, %rax
|
||||
; NOGATHER-NEXT: vextractf128 $1, %ymm1, %xmm0
|
||||
; NOGATHER-NEXT: vpinsrq $1, (%rax), %xmm0, %xmm0
|
||||
; NOGATHER-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm1
|
||||
; NOGATHER-NEXT: .LBB8_8: # %else8
|
||||
; NOGATHER-NEXT: vpslld $31, %xmm0, %xmm0
|
||||
; NOGATHER-NEXT: vpsrad $31, %xmm0, %xmm0
|
||||
; NOGATHER-NEXT: vpmovsxdq %xmm0, %xmm3
|
||||
; NOGATHER-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; NOGATHER-NEXT: vpmovsxdq %xmm0, %xmm0
|
||||
; NOGATHER-NEXT: vinsertf128 $1, %xmm0, %ymm3, %ymm0
|
||||
; NOGATHER-NEXT: vblendvpd %ymm0, %ymm2, %ymm1, %ymm0
|
||||
; NOGATHER-NEXT: vmovaps %ymm1, %ymm0
|
||||
; NOGATHER-NEXT: retq
|
||||
entry:
|
||||
%ld = load <4 x i64*>, <4 x i64*>* %ptr
|
||||
|
@ -660,50 +634,44 @@ define <4 x double> @masked_gather_v4double(<4 x double*>* %ptr, <4 x i1> %masks
|
|||
;
|
||||
; NOGATHER-LABEL: masked_gather_v4double:
|
||||
; NOGATHER: # %bb.0: # %entry
|
||||
; NOGATHER-NEXT: vmovdqa (%rdi), %ymm3
|
||||
; NOGATHER-NEXT: vmovdqa (%rdi), %ymm2
|
||||
; NOGATHER-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; NOGATHER-NEXT: # implicit-def: $ymm2
|
||||
; NOGATHER-NEXT: testb $1, %al
|
||||
; NOGATHER-NEXT: je .LBB9_2
|
||||
; NOGATHER-NEXT: # %bb.1: # %cond.load
|
||||
; NOGATHER-NEXT: vmovq %xmm3, %rax
|
||||
; NOGATHER-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero
|
||||
; NOGATHER-NEXT: vmovq %xmm2, %rax
|
||||
; NOGATHER-NEXT: vmovsd {{.*#+}} xmm3 = mem[0],zero
|
||||
; NOGATHER-NEXT: vblendpd {{.*#+}} ymm1 = ymm3[0],ymm1[1,2,3]
|
||||
; NOGATHER-NEXT: .LBB9_2: # %else
|
||||
; NOGATHER-NEXT: vpextrb $4, %xmm0, %eax
|
||||
; NOGATHER-NEXT: testb $1, %al
|
||||
; NOGATHER-NEXT: je .LBB9_4
|
||||
; NOGATHER-NEXT: # %bb.3: # %cond.load1
|
||||
; NOGATHER-NEXT: vpextrq $1, %xmm3, %rax
|
||||
; NOGATHER-NEXT: vmovhpd {{.*#+}} xmm4 = xmm2[0],mem[0]
|
||||
; NOGATHER-NEXT: vblendpd {{.*#+}} ymm2 = ymm4[0,1],ymm2[2,3]
|
||||
; NOGATHER-NEXT: vpextrq $1, %xmm2, %rax
|
||||
; NOGATHER-NEXT: vmovhpd {{.*#+}} xmm3 = xmm1[0],mem[0]
|
||||
; NOGATHER-NEXT: vblendpd {{.*#+}} ymm1 = ymm3[0,1],ymm1[2,3]
|
||||
; NOGATHER-NEXT: .LBB9_4: # %else2
|
||||
; NOGATHER-NEXT: vpextrb $8, %xmm0, %eax
|
||||
; NOGATHER-NEXT: testb $1, %al
|
||||
; NOGATHER-NEXT: je .LBB9_6
|
||||
; NOGATHER-NEXT: # %bb.5: # %cond.load4
|
||||
; NOGATHER-NEXT: vextractf128 $1, %ymm3, %xmm4
|
||||
; NOGATHER-NEXT: vmovq %xmm4, %rax
|
||||
; NOGATHER-NEXT: vextractf128 $1, %ymm2, %xmm4
|
||||
; NOGATHER-NEXT: vmovlpd {{.*#+}} xmm4 = mem[0],xmm4[1]
|
||||
; NOGATHER-NEXT: vinsertf128 $1, %xmm4, %ymm2, %ymm2
|
||||
; NOGATHER-NEXT: vextractf128 $1, %ymm2, %xmm3
|
||||
; NOGATHER-NEXT: vmovq %xmm3, %rax
|
||||
; NOGATHER-NEXT: vextractf128 $1, %ymm1, %xmm3
|
||||
; NOGATHER-NEXT: vmovlpd {{.*#+}} xmm3 = mem[0],xmm3[1]
|
||||
; NOGATHER-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
|
||||
; NOGATHER-NEXT: .LBB9_6: # %else5
|
||||
; NOGATHER-NEXT: vpextrb $12, %xmm0, %eax
|
||||
; NOGATHER-NEXT: testb $1, %al
|
||||
; NOGATHER-NEXT: je .LBB9_8
|
||||
; NOGATHER-NEXT: # %bb.7: # %cond.load7
|
||||
; NOGATHER-NEXT: vextractf128 $1, %ymm3, %xmm3
|
||||
; NOGATHER-NEXT: vpextrq $1, %xmm3, %rax
|
||||
; NOGATHER-NEXT: vextractf128 $1, %ymm2, %xmm3
|
||||
; NOGATHER-NEXT: vmovhpd {{.*#+}} xmm3 = xmm3[0],mem[0]
|
||||
; NOGATHER-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
|
||||
; NOGATHER-NEXT: vextractf128 $1, %ymm2, %xmm0
|
||||
; NOGATHER-NEXT: vpextrq $1, %xmm0, %rax
|
||||
; NOGATHER-NEXT: vextractf128 $1, %ymm1, %xmm0
|
||||
; NOGATHER-NEXT: vmovhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
|
||||
; NOGATHER-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm1
|
||||
; NOGATHER-NEXT: .LBB9_8: # %else8
|
||||
; NOGATHER-NEXT: vpslld $31, %xmm0, %xmm0
|
||||
; NOGATHER-NEXT: vpsrad $31, %xmm0, %xmm0
|
||||
; NOGATHER-NEXT: vpmovsxdq %xmm0, %xmm3
|
||||
; NOGATHER-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; NOGATHER-NEXT: vpmovsxdq %xmm0, %xmm0
|
||||
; NOGATHER-NEXT: vinsertf128 $1, %xmm0, %ymm3, %ymm0
|
||||
; NOGATHER-NEXT: vblendvpd %ymm0, %ymm2, %ymm1, %ymm0
|
||||
; NOGATHER-NEXT: vmovapd %ymm1, %ymm0
|
||||
; NOGATHER-NEXT: retq
|
||||
entry:
|
||||
%ld = load <4 x double*>, <4 x double*>* %ptr
|
||||
|
@ -733,24 +701,22 @@ define <2 x i64> @masked_gather_v2i64(<2 x i64*>* %ptr, <2 x i1> %masks, <2 x i6
|
|||
;
|
||||
; NOGATHER-LABEL: masked_gather_v2i64:
|
||||
; NOGATHER: # %bb.0: # %entry
|
||||
; NOGATHER-NEXT: vmovdqa (%rdi), %xmm3
|
||||
; NOGATHER-NEXT: vmovdqa (%rdi), %xmm2
|
||||
; NOGATHER-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; NOGATHER-NEXT: # implicit-def: $xmm2
|
||||
; NOGATHER-NEXT: testb $1, %al
|
||||
; NOGATHER-NEXT: je .LBB10_2
|
||||
; NOGATHER-NEXT: # %bb.1: # %cond.load
|
||||
; NOGATHER-NEXT: vmovq %xmm3, %rax
|
||||
; NOGATHER-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero
|
||||
; NOGATHER-NEXT: vmovq %xmm2, %rax
|
||||
; NOGATHER-NEXT: vpinsrq $0, (%rax), %xmm1, %xmm1
|
||||
; NOGATHER-NEXT: .LBB10_2: # %else
|
||||
; NOGATHER-NEXT: vpextrb $8, %xmm0, %eax
|
||||
; NOGATHER-NEXT: testb $1, %al
|
||||
; NOGATHER-NEXT: je .LBB10_4
|
||||
; NOGATHER-NEXT: # %bb.3: # %cond.load1
|
||||
; NOGATHER-NEXT: vpextrq $1, %xmm3, %rax
|
||||
; NOGATHER-NEXT: vpinsrq $1, (%rax), %xmm2, %xmm2
|
||||
; NOGATHER-NEXT: vpextrq $1, %xmm2, %rax
|
||||
; NOGATHER-NEXT: vpinsrq $1, (%rax), %xmm1, %xmm1
|
||||
; NOGATHER-NEXT: .LBB10_4: # %else2
|
||||
; NOGATHER-NEXT: vpsllq $63, %xmm0, %xmm0
|
||||
; NOGATHER-NEXT: vblendvpd %xmm0, %xmm2, %xmm1, %xmm0
|
||||
; NOGATHER-NEXT: vmovdqa %xmm1, %xmm0
|
||||
; NOGATHER-NEXT: retq
|
||||
entry:
|
||||
%ld = load <2 x i64*>, <2 x i64*>* %ptr
|
||||
|
@ -780,24 +746,22 @@ define <2 x double> @masked_gather_v2double(<2 x double*>* %ptr, <2 x i1> %masks
|
|||
;
|
||||
; NOGATHER-LABEL: masked_gather_v2double:
|
||||
; NOGATHER: # %bb.0: # %entry
|
||||
; NOGATHER-NEXT: vmovdqa (%rdi), %xmm3
|
||||
; NOGATHER-NEXT: vmovdqa (%rdi), %xmm2
|
||||
; NOGATHER-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; NOGATHER-NEXT: # implicit-def: $xmm2
|
||||
; NOGATHER-NEXT: testb $1, %al
|
||||
; NOGATHER-NEXT: je .LBB11_2
|
||||
; NOGATHER-NEXT: # %bb.1: # %cond.load
|
||||
; NOGATHER-NEXT: vmovq %xmm3, %rax
|
||||
; NOGATHER-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero
|
||||
; NOGATHER-NEXT: vmovq %xmm2, %rax
|
||||
; NOGATHER-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1]
|
||||
; NOGATHER-NEXT: .LBB11_2: # %else
|
||||
; NOGATHER-NEXT: vpextrb $8, %xmm0, %eax
|
||||
; NOGATHER-NEXT: testb $1, %al
|
||||
; NOGATHER-NEXT: je .LBB11_4
|
||||
; NOGATHER-NEXT: # %bb.3: # %cond.load1
|
||||
; NOGATHER-NEXT: vpextrq $1, %xmm3, %rax
|
||||
; NOGATHER-NEXT: vmovhpd {{.*#+}} xmm2 = xmm2[0],mem[0]
|
||||
; NOGATHER-NEXT: vpextrq $1, %xmm2, %rax
|
||||
; NOGATHER-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0]
|
||||
; NOGATHER-NEXT: .LBB11_4: # %else2
|
||||
; NOGATHER-NEXT: vpsllq $63, %xmm0, %xmm0
|
||||
; NOGATHER-NEXT: vblendvpd %xmm0, %xmm2, %xmm1, %xmm0
|
||||
; NOGATHER-NEXT: vmovapd %xmm1, %xmm0
|
||||
; NOGATHER-NEXT: retq
|
||||
entry:
|
||||
%ld = load <2 x double*>, <2 x double*>* %ptr
|
||||
|
|
|
@ -1658,38 +1658,35 @@ declare <3 x i32> @llvm.masked.gather.v3i32.v3p0i32(<3 x i32*>, i32, <3 x i1>, <
|
|||
define <3 x i32> @test30(<3 x i32*> %base, <3 x i32> %ind, <3 x i1> %mask, <3 x i32> %src0) {
|
||||
; KNL_64-LABEL: test30:
|
||||
; KNL_64: # %bb.0:
|
||||
; KNL_64-NEXT: # kill: def $xmm3 killed $xmm3 def $zmm3
|
||||
; KNL_64-NEXT: vpslld $31, %xmm2, %xmm2
|
||||
; KNL_64-NEXT: vptestmd %zmm2, %zmm2, %k1
|
||||
; KNL_64-NEXT: kmovw %k1, %eax
|
||||
; KNL_64-NEXT: vptestmd %zmm2, %zmm2, %k0
|
||||
; KNL_64-NEXT: kmovw %k0, %eax
|
||||
; KNL_64-NEXT: vpmovsxdq %xmm1, %ymm1
|
||||
; KNL_64-NEXT: vpsllq $2, %ymm1, %ymm1
|
||||
; KNL_64-NEXT: vpaddq %ymm1, %ymm0, %ymm1
|
||||
; KNL_64-NEXT: vpaddq %ymm1, %ymm0, %ymm0
|
||||
; KNL_64-NEXT: testb $1, %al
|
||||
; KNL_64-NEXT: # implicit-def: $xmm0
|
||||
; KNL_64-NEXT: je .LBB31_2
|
||||
; KNL_64-NEXT: # %bb.1: # %cond.load
|
||||
; KNL_64-NEXT: vmovq %xmm1, %rax
|
||||
; KNL_64-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; KNL_64-NEXT: vmovq %xmm0, %rax
|
||||
; KNL_64-NEXT: vpinsrd $0, (%rax), %xmm3, %xmm3
|
||||
; KNL_64-NEXT: .LBB31_2: # %else
|
||||
; KNL_64-NEXT: kshiftrw $1, %k1, %k0
|
||||
; KNL_64-NEXT: kmovw %k0, %eax
|
||||
; KNL_64-NEXT: kshiftrw $1, %k0, %k1
|
||||
; KNL_64-NEXT: kmovw %k1, %eax
|
||||
; KNL_64-NEXT: testb $1, %al
|
||||
; KNL_64-NEXT: je .LBB31_4
|
||||
; KNL_64-NEXT: # %bb.3: # %cond.load1
|
||||
; KNL_64-NEXT: vpextrq $1, %xmm1, %rax
|
||||
; KNL_64-NEXT: vpinsrd $1, (%rax), %xmm0, %xmm0
|
||||
; KNL_64-NEXT: vpextrq $1, %xmm0, %rax
|
||||
; KNL_64-NEXT: vpinsrd $1, (%rax), %xmm3, %xmm3
|
||||
; KNL_64-NEXT: .LBB31_4: # %else2
|
||||
; KNL_64-NEXT: kshiftrw $2, %k1, %k0
|
||||
; KNL_64-NEXT: kshiftrw $2, %k0, %k0
|
||||
; KNL_64-NEXT: kmovw %k0, %eax
|
||||
; KNL_64-NEXT: testb $1, %al
|
||||
; KNL_64-NEXT: je .LBB31_6
|
||||
; KNL_64-NEXT: # %bb.5: # %cond.load4
|
||||
; KNL_64-NEXT: vextracti128 $1, %ymm1, %xmm1
|
||||
; KNL_64-NEXT: vmovq %xmm1, %rax
|
||||
; KNL_64-NEXT: vpinsrd $2, (%rax), %xmm0, %xmm0
|
||||
; KNL_64-NEXT: vextracti128 $1, %ymm0, %xmm0
|
||||
; KNL_64-NEXT: vmovq %xmm0, %rax
|
||||
; KNL_64-NEXT: vpinsrd $2, (%rax), %xmm3, %xmm3
|
||||
; KNL_64-NEXT: .LBB31_6: # %else5
|
||||
; KNL_64-NEXT: vmovdqa32 %zmm0, %zmm3 {%k1}
|
||||
; KNL_64-NEXT: vmovdqa %xmm3, %xmm0
|
||||
; KNL_64-NEXT: vzeroupper
|
||||
; KNL_64-NEXT: retq
|
||||
|
@ -1698,37 +1695,35 @@ define <3 x i32> @test30(<3 x i32*> %base, <3 x i32> %ind, <3 x i1> %mask, <3 x
|
|||
; KNL_32: # %bb.0:
|
||||
; KNL_32-NEXT: subl $12, %esp
|
||||
; KNL_32-NEXT: .cfi_def_cfa_offset 16
|
||||
; KNL_32-NEXT: vpslld $31, %xmm2, %xmm2
|
||||
; KNL_32-NEXT: vptestmd %zmm2, %zmm2, %k1
|
||||
; KNL_32-NEXT: kmovw %k1, %eax
|
||||
; KNL_32-NEXT: vmovdqa %xmm0, %xmm3
|
||||
; KNL_32-NEXT: vpslld $31, %xmm2, %xmm0
|
||||
; KNL_32-NEXT: vptestmd %zmm0, %zmm0, %k0
|
||||
; KNL_32-NEXT: kmovw %k0, %eax
|
||||
; KNL_32-NEXT: vmovdqa {{[0-9]+}}(%esp), %xmm0
|
||||
; KNL_32-NEXT: vpslld $2, %xmm1, %xmm1
|
||||
; KNL_32-NEXT: vpaddd %xmm1, %xmm0, %xmm2
|
||||
; KNL_32-NEXT: vpaddd %xmm1, %xmm3, %xmm1
|
||||
; KNL_32-NEXT: testb $1, %al
|
||||
; KNL_32-NEXT: # implicit-def: $xmm1
|
||||
; KNL_32-NEXT: je .LBB31_2
|
||||
; KNL_32-NEXT: # %bb.1: # %cond.load
|
||||
; KNL_32-NEXT: vmovd %xmm2, %eax
|
||||
; KNL_32-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||||
; KNL_32-NEXT: vmovd %xmm1, %eax
|
||||
; KNL_32-NEXT: vpinsrd $0, (%eax), %xmm0, %xmm0
|
||||
; KNL_32-NEXT: .LBB31_2: # %else
|
||||
; KNL_32-NEXT: kshiftrw $1, %k1, %k0
|
||||
; KNL_32-NEXT: kmovw %k0, %eax
|
||||
; KNL_32-NEXT: kshiftrw $1, %k0, %k1
|
||||
; KNL_32-NEXT: kmovw %k1, %eax
|
||||
; KNL_32-NEXT: testb $1, %al
|
||||
; KNL_32-NEXT: je .LBB31_4
|
||||
; KNL_32-NEXT: # %bb.3: # %cond.load1
|
||||
; KNL_32-NEXT: vpextrd $1, %xmm2, %eax
|
||||
; KNL_32-NEXT: vpinsrd $1, (%eax), %xmm1, %xmm1
|
||||
; KNL_32-NEXT: vpextrd $1, %xmm1, %eax
|
||||
; KNL_32-NEXT: vpinsrd $1, (%eax), %xmm0, %xmm0
|
||||
; KNL_32-NEXT: .LBB31_4: # %else2
|
||||
; KNL_32-NEXT: vmovdqa {{[0-9]+}}(%esp), %xmm0
|
||||
; KNL_32-NEXT: kshiftrw $2, %k1, %k0
|
||||
; KNL_32-NEXT: kshiftrw $2, %k0, %k0
|
||||
; KNL_32-NEXT: kmovw %k0, %eax
|
||||
; KNL_32-NEXT: testb $1, %al
|
||||
; KNL_32-NEXT: je .LBB31_6
|
||||
; KNL_32-NEXT: # %bb.5: # %cond.load4
|
||||
; KNL_32-NEXT: vpextrd $2, %xmm2, %eax
|
||||
; KNL_32-NEXT: vpinsrd $2, (%eax), %xmm1, %xmm1
|
||||
; KNL_32-NEXT: vpextrd $2, %xmm1, %eax
|
||||
; KNL_32-NEXT: vpinsrd $2, (%eax), %xmm0, %xmm0
|
||||
; KNL_32-NEXT: .LBB31_6: # %else5
|
||||
; KNL_32-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1}
|
||||
; KNL_32-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
|
||||
; KNL_32-NEXT: addl $12, %esp
|
||||
; KNL_32-NEXT: .cfi_def_cfa_offset 4
|
||||
; KNL_32-NEXT: vzeroupper
|
||||
|
@ -1737,36 +1732,34 @@ define <3 x i32> @test30(<3 x i32*> %base, <3 x i32> %ind, <3 x i1> %mask, <3 x
|
|||
; SKX-LABEL: test30:
|
||||
; SKX: # %bb.0:
|
||||
; SKX-NEXT: vpslld $31, %xmm2, %xmm2
|
||||
; SKX-NEXT: vpmovd2m %xmm2, %k1
|
||||
; SKX-NEXT: kmovw %k1, %eax
|
||||
; SKX-NEXT: vpmovd2m %xmm2, %k0
|
||||
; SKX-NEXT: kmovw %k0, %eax
|
||||
; SKX-NEXT: vpmovsxdq %xmm1, %ymm1
|
||||
; SKX-NEXT: vpsllq $2, %ymm1, %ymm1
|
||||
; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm1
|
||||
; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0
|
||||
; SKX-NEXT: testb $1, %al
|
||||
; SKX-NEXT: # implicit-def: $xmm0
|
||||
; SKX-NEXT: je .LBB31_2
|
||||
; SKX-NEXT: # %bb.1: # %cond.load
|
||||
; SKX-NEXT: vmovq %xmm1, %rax
|
||||
; SKX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; SKX-NEXT: vmovq %xmm0, %rax
|
||||
; SKX-NEXT: vpinsrd $0, (%rax), %xmm3, %xmm3
|
||||
; SKX-NEXT: .LBB31_2: # %else
|
||||
; SKX-NEXT: kshiftrw $1, %k1, %k0
|
||||
; SKX-NEXT: kmovw %k0, %eax
|
||||
; SKX-NEXT: kshiftrw $1, %k0, %k1
|
||||
; SKX-NEXT: kmovw %k1, %eax
|
||||
; SKX-NEXT: testb $1, %al
|
||||
; SKX-NEXT: je .LBB31_4
|
||||
; SKX-NEXT: # %bb.3: # %cond.load1
|
||||
; SKX-NEXT: vpextrq $1, %xmm1, %rax
|
||||
; SKX-NEXT: vpinsrd $1, (%rax), %xmm0, %xmm0
|
||||
; SKX-NEXT: vpextrq $1, %xmm0, %rax
|
||||
; SKX-NEXT: vpinsrd $1, (%rax), %xmm3, %xmm3
|
||||
; SKX-NEXT: .LBB31_4: # %else2
|
||||
; SKX-NEXT: kshiftrw $2, %k1, %k0
|
||||
; SKX-NEXT: kshiftrw $2, %k0, %k0
|
||||
; SKX-NEXT: kmovw %k0, %eax
|
||||
; SKX-NEXT: testb $1, %al
|
||||
; SKX-NEXT: je .LBB31_6
|
||||
; SKX-NEXT: # %bb.5: # %cond.load4
|
||||
; SKX-NEXT: vextracti128 $1, %ymm1, %xmm1
|
||||
; SKX-NEXT: vmovq %xmm1, %rax
|
||||
; SKX-NEXT: vpinsrd $2, (%rax), %xmm0, %xmm0
|
||||
; SKX-NEXT: vextracti128 $1, %ymm0, %xmm0
|
||||
; SKX-NEXT: vmovq %xmm0, %rax
|
||||
; SKX-NEXT: vpinsrd $2, (%rax), %xmm3, %xmm3
|
||||
; SKX-NEXT: .LBB31_6: # %else5
|
||||
; SKX-NEXT: vmovdqa32 %xmm0, %xmm3 {%k1}
|
||||
; SKX-NEXT: vmovdqa %xmm3, %xmm0
|
||||
; SKX-NEXT: vzeroupper
|
||||
; SKX-NEXT: retq
|
||||
|
@ -1775,36 +1768,35 @@ define <3 x i32> @test30(<3 x i32*> %base, <3 x i32> %ind, <3 x i1> %mask, <3 x
|
|||
; SKX_32: # %bb.0:
|
||||
; SKX_32-NEXT: subl $12, %esp
|
||||
; SKX_32-NEXT: .cfi_def_cfa_offset 16
|
||||
; SKX_32-NEXT: vpslld $31, %xmm2, %xmm2
|
||||
; SKX_32-NEXT: vpmovd2m %xmm2, %k1
|
||||
; SKX_32-NEXT: kmovw %k1, %eax
|
||||
; SKX_32-NEXT: vmovdqa %xmm0, %xmm3
|
||||
; SKX_32-NEXT: vpslld $31, %xmm2, %xmm0
|
||||
; SKX_32-NEXT: vpmovd2m %xmm0, %k0
|
||||
; SKX_32-NEXT: kmovw %k0, %eax
|
||||
; SKX_32-NEXT: vmovdqa {{[0-9]+}}(%esp), %xmm0
|
||||
; SKX_32-NEXT: vpslld $2, %xmm1, %xmm1
|
||||
; SKX_32-NEXT: vpaddd %xmm1, %xmm0, %xmm2
|
||||
; SKX_32-NEXT: vpaddd %xmm1, %xmm3, %xmm1
|
||||
; SKX_32-NEXT: testb $1, %al
|
||||
; SKX_32-NEXT: # implicit-def: $xmm1
|
||||
; SKX_32-NEXT: je .LBB31_2
|
||||
; SKX_32-NEXT: # %bb.1: # %cond.load
|
||||
; SKX_32-NEXT: vmovd %xmm2, %eax
|
||||
; SKX_32-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||||
; SKX_32-NEXT: vmovd %xmm1, %eax
|
||||
; SKX_32-NEXT: vpinsrd $0, (%eax), %xmm0, %xmm0
|
||||
; SKX_32-NEXT: .LBB31_2: # %else
|
||||
; SKX_32-NEXT: kshiftrw $1, %k1, %k0
|
||||
; SKX_32-NEXT: kmovw %k0, %eax
|
||||
; SKX_32-NEXT: kshiftrw $1, %k0, %k1
|
||||
; SKX_32-NEXT: kmovw %k1, %eax
|
||||
; SKX_32-NEXT: testb $1, %al
|
||||
; SKX_32-NEXT: je .LBB31_4
|
||||
; SKX_32-NEXT: # %bb.3: # %cond.load1
|
||||
; SKX_32-NEXT: vpextrd $1, %xmm2, %eax
|
||||
; SKX_32-NEXT: vpinsrd $1, (%eax), %xmm1, %xmm1
|
||||
; SKX_32-NEXT: vpextrd $1, %xmm1, %eax
|
||||
; SKX_32-NEXT: vpinsrd $1, (%eax), %xmm0, %xmm0
|
||||
; SKX_32-NEXT: .LBB31_4: # %else2
|
||||
; SKX_32-NEXT: vmovdqa {{[0-9]+}}(%esp), %xmm0
|
||||
; SKX_32-NEXT: kshiftrw $2, %k1, %k0
|
||||
; SKX_32-NEXT: kshiftrw $2, %k0, %k0
|
||||
; SKX_32-NEXT: kmovw %k0, %eax
|
||||
; SKX_32-NEXT: testb $1, %al
|
||||
; SKX_32-NEXT: je .LBB31_6
|
||||
; SKX_32-NEXT: # %bb.5: # %cond.load4
|
||||
; SKX_32-NEXT: vpextrd $2, %xmm2, %eax
|
||||
; SKX_32-NEXT: vpinsrd $2, (%eax), %xmm1, %xmm1
|
||||
; SKX_32-NEXT: vpextrd $2, %xmm1, %eax
|
||||
; SKX_32-NEXT: vpinsrd $2, (%eax), %xmm0, %xmm0
|
||||
; SKX_32-NEXT: .LBB31_6: # %else5
|
||||
; SKX_32-NEXT: vmovdqa32 %xmm1, %xmm0 {%k1}
|
||||
; SKX_32-NEXT: addl $12, %esp
|
||||
; SKX_32-NEXT: .cfi_def_cfa_offset 4
|
||||
; SKX_32-NEXT: retl
|
||||
|
|
|
@ -8,10 +8,10 @@ define <2 x i64> @scalarize_v2i64(<2 x i64*> %p, <2 x i1> %mask, <2 x i64> %pass
|
|||
; CHECK: cond.load:
|
||||
; CHECK-NEXT: [[PTR0:%.*]] = extractelement <2 x i64*> [[P:%.*]], i32 0
|
||||
; CHECK-NEXT: [[LOAD0:%.*]] = load i64, i64* [[PTR0]], align 8
|
||||
; CHECK-NEXT: [[RES0:%.*]] = insertelement <2 x i64> undef, i64 [[LOAD0]], i32 0
|
||||
; CHECK-NEXT: [[RES0:%.*]] = insertelement <2 x i64> [[PASSTHRU:%.*]], i64 [[LOAD0]], i32 0
|
||||
; CHECK-NEXT: br label [[ELSE]]
|
||||
; CHECK: else:
|
||||
; CHECK-NEXT: [[RES_PHI_ELSE:%.*]] = phi <2 x i64> [ [[RES0]], [[COND_LOAD]] ], [ undef, [[TMP0:%.*]] ]
|
||||
; CHECK-NEXT: [[RES_PHI_ELSE:%.*]] = phi <2 x i64> [ [[RES0]], [[COND_LOAD]] ], [ [[PASSTHRU]], [[TMP0:%.*]] ]
|
||||
; CHECK-NEXT: [[MASK1:%.*]] = extractelement <2 x i1> [[MASK]], i32 1
|
||||
; CHECK-NEXT: br i1 [[MASK1]], label [[COND_LOAD1:%.*]], label [[ELSE2:%.*]]
|
||||
; CHECK: cond.load1:
|
||||
|
@ -20,9 +20,8 @@ define <2 x i64> @scalarize_v2i64(<2 x i64*> %p, <2 x i1> %mask, <2 x i64> %pass
|
|||
; CHECK-NEXT: [[RES1:%.*]] = insertelement <2 x i64> [[RES_PHI_ELSE]], i64 [[LOAD1]], i32 1
|
||||
; CHECK-NEXT: br label [[ELSE2]]
|
||||
; CHECK: else2:
|
||||
; CHECK-NEXT: [[RES_PHI_SELECT:%.*]] = phi <2 x i64> [ [[RES1]], [[COND_LOAD1]] ], [ [[RES_PHI_ELSE]], [[ELSE]] ]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = select <2 x i1> [[MASK]], <2 x i64> [[RES_PHI_SELECT]], <2 x i64> [[PASSTHRU:%.*]]
|
||||
; CHECK-NEXT: ret <2 x i64> [[TMP1]]
|
||||
; CHECK-NEXT: [[RES_PHI_ELSE3:%.*]] = phi <2 x i64> [ [[RES1]], [[COND_LOAD1]] ], [ [[RES_PHI_ELSE]], [[ELSE]] ]
|
||||
; CHECK-NEXT: ret <2 x i64> [[RES_PHI_ELSE3]]
|
||||
;
|
||||
%ret = call <2 x i64> @llvm.masked.gather.v2i64.v2p0i64(<2 x i64*> %p, i32 8, <2 x i1> %mask, <2 x i64> %passthru)
|
||||
ret <2 x i64> %ret
|
||||
|
@ -32,12 +31,11 @@ define <2 x i64> @scalarize_v2i64_ones_mask(<2 x i64*> %p, <2 x i64> %passthru)
|
|||
; CHECK-LABEL: @scalarize_v2i64_ones_mask(
|
||||
; CHECK-NEXT: [[PTR0:%.*]] = extractelement <2 x i64*> [[P:%.*]], i32 0
|
||||
; CHECK-NEXT: [[LOAD0:%.*]] = load i64, i64* [[PTR0]], align 8
|
||||
; CHECK-NEXT: [[RES0:%.*]] = insertelement <2 x i64> undef, i64 [[LOAD0]], i32 0
|
||||
; CHECK-NEXT: [[RES0:%.*]] = insertelement <2 x i64> [[PASSTHRU:%.*]], i64 [[LOAD0]], i32 0
|
||||
; CHECK-NEXT: [[PTR1:%.*]] = extractelement <2 x i64*> [[P]], i32 1
|
||||
; CHECK-NEXT: [[LOAD1:%.*]] = load i64, i64* [[PTR1]], align 8
|
||||
; CHECK-NEXT: [[RES1:%.*]] = insertelement <2 x i64> [[RES0]], i64 [[LOAD1]], i32 1
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = select <2 x i1> <i1 true, i1 true>, <2 x i64> [[RES1]], <2 x i64> [[PASSTHRU:%.*]]
|
||||
; CHECK-NEXT: ret <2 x i64> [[TMP1]]
|
||||
; CHECK-NEXT: ret <2 x i64> [[RES1]]
|
||||
;
|
||||
%ret = call <2 x i64> @llvm.masked.gather.v2i64.v2p0i64(<2 x i64*> %p, i32 8, <2 x i1> <i1 true, i1 true>, <2 x i64> %passthru)
|
||||
ret <2 x i64> %ret
|
||||
|
@ -45,8 +43,7 @@ define <2 x i64> @scalarize_v2i64_ones_mask(<2 x i64*> %p, <2 x i64> %passthru)
|
|||
|
||||
define <2 x i64> @scalarize_v2i64_zero_mask(<2 x i64*> %p, <2 x i64> %passthru) {
|
||||
; CHECK-LABEL: @scalarize_v2i64_zero_mask(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = select <2 x i1> zeroinitializer, <2 x i64> undef, <2 x i64> [[PASSTHRU:%.*]]
|
||||
; CHECK-NEXT: ret <2 x i64> [[TMP1]]
|
||||
; CHECK-NEXT: ret <2 x i64> [[PASSTHRU:%.*]]
|
||||
;
|
||||
%ret = call <2 x i64> @llvm.masked.gather.v2i64.v2p0i64(<2 x i64*> %p, i32 8, <2 x i1> <i1 false, i1 false>, <2 x i64> %passthru)
|
||||
ret <2 x i64> %ret
|
||||
|
@ -56,9 +53,8 @@ define <2 x i64> @scalarize_v2i64_const_mask(<2 x i64*> %p, <2 x i64> %passthru)
|
|||
; CHECK-LABEL: @scalarize_v2i64_const_mask(
|
||||
; CHECK-NEXT: [[PTR1:%.*]] = extractelement <2 x i64*> [[P:%.*]], i32 1
|
||||
; CHECK-NEXT: [[LOAD1:%.*]] = load i64, i64* [[PTR1]], align 8
|
||||
; CHECK-NEXT: [[RES1:%.*]] = insertelement <2 x i64> undef, i64 [[LOAD1]], i32 1
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = select <2 x i1> <i1 false, i1 true>, <2 x i64> [[RES1]], <2 x i64> [[PASSTHRU:%.*]]
|
||||
; CHECK-NEXT: ret <2 x i64> [[TMP1]]
|
||||
; CHECK-NEXT: [[RES1:%.*]] = insertelement <2 x i64> [[PASSTHRU:%.*]], i64 [[LOAD1]], i32 1
|
||||
; CHECK-NEXT: ret <2 x i64> [[RES1]]
|
||||
;
|
||||
%ret = call <2 x i64> @llvm.masked.gather.v2i64.v2p0i64(<2 x i64*> %p, i32 8, <2 x i1> <i1 false, i1 true>, <2 x i64> %passthru)
|
||||
ret <2 x i64> %ret
|
||||
|
|
Loading…
Reference in New Issue