forked from OSchip/llvm-project
[ScalarizeMaskedMemIntrin] Don't emit 'icmp eq i1 %x, 1' to check mask values. That's just %x so use that directly.
Had we emitted this IR earlier, InstCombine would have removed icmp so I'm going to assume using the i1 directly would be considered canonical. llvm-svn: 343244
This commit is contained in:
parent
141f208e12
commit
0423681d4a
|
@ -181,8 +181,7 @@ static void scalarizeMaskedLoad(CallInst *CI) {
|
|||
//
|
||||
// %res.phi.else3 = phi <16 x i32> [ %11, %cond.load1 ], [ %res.phi.else, %else ]
|
||||
// %mask_1 = extractelement <16 x i1> %mask, i32 Idx
|
||||
// %to_load = icmp eq i1 %mask_1, true
|
||||
// br i1 %to_load, label %cond.load, label %else
|
||||
// br i1 %mask_1, label %cond.load, label %else
|
||||
//
|
||||
if (Idx > 0) {
|
||||
Phi = Builder.CreatePHI(VecType, 2, "res.phi.else");
|
||||
|
@ -194,8 +193,6 @@ static void scalarizeMaskedLoad(CallInst *CI) {
|
|||
|
||||
Value *Predicate =
|
||||
Builder.CreateExtractElement(Mask, Builder.getInt32(Idx));
|
||||
Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Predicate,
|
||||
ConstantInt::get(Predicate->getType(), 1));
|
||||
|
||||
// Create "cond" block
|
||||
//
|
||||
|
@ -216,7 +213,7 @@ static void scalarizeMaskedLoad(CallInst *CI) {
|
|||
CondBlock->splitBasicBlock(InsertPt->getIterator(), "else");
|
||||
Builder.SetInsertPoint(InsertPt);
|
||||
Instruction *OldBr = IfBlock->getTerminator();
|
||||
BranchInst::Create(CondBlock, NewIfBlock, Cmp, OldBr);
|
||||
BranchInst::Create(CondBlock, NewIfBlock, Predicate, OldBr);
|
||||
OldBr->eraseFromParent();
|
||||
PrevIfBlock = IfBlock;
|
||||
IfBlock = NewIfBlock;
|
||||
|
@ -311,13 +308,10 @@ static void scalarizeMaskedStore(CallInst *CI) {
|
|||
// Fill the "else" block, created in the previous iteration
|
||||
//
|
||||
// %mask_1 = extractelement <16 x i1> %mask, i32 Idx
|
||||
// %to_store = icmp eq i1 %mask_1, true
|
||||
// br i1 %to_store, label %cond.store, label %else
|
||||
// br i1 %mask_1, label %cond.store, label %else
|
||||
//
|
||||
Value *Predicate =
|
||||
Builder.CreateExtractElement(Mask, Builder.getInt32(Idx));
|
||||
Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Predicate,
|
||||
ConstantInt::get(Predicate->getType(), 1));
|
||||
|
||||
// Create "cond" block
|
||||
//
|
||||
|
@ -339,7 +333,7 @@ static void scalarizeMaskedStore(CallInst *CI) {
|
|||
CondBlock->splitBasicBlock(InsertPt->getIterator(), "else");
|
||||
Builder.SetInsertPoint(InsertPt);
|
||||
Instruction *OldBr = IfBlock->getTerminator();
|
||||
BranchInst::Create(CondBlock, NewIfBlock, Cmp, OldBr);
|
||||
BranchInst::Create(CondBlock, NewIfBlock, Predicate, OldBr);
|
||||
OldBr->eraseFromParent();
|
||||
IfBlock = NewIfBlock;
|
||||
}
|
||||
|
@ -430,8 +424,7 @@ static void scalarizeMaskedGather(CallInst *CI) {
|
|||
// Fill the "else" block, created in the previous iteration
|
||||
//
|
||||
// %Mask1 = extractelement <16 x i1> %Mask, i32 1
|
||||
// %ToLoad1 = icmp eq i1 %Mask1, true
|
||||
// br i1 %ToLoad1, label %cond.load, label %else
|
||||
// br i1 %Mask1, label %cond.load, label %else
|
||||
//
|
||||
if (Idx > 0) {
|
||||
Phi = Builder.CreatePHI(VecType, 2, "res.phi.else");
|
||||
|
@ -443,9 +436,6 @@ static void scalarizeMaskedGather(CallInst *CI) {
|
|||
|
||||
Value *Predicate = Builder.CreateExtractElement(Mask, Builder.getInt32(Idx),
|
||||
"Mask" + Twine(Idx));
|
||||
Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Predicate,
|
||||
ConstantInt::get(Predicate->getType(), 1),
|
||||
"ToLoad" + Twine(Idx));
|
||||
|
||||
// Create "cond" block
|
||||
//
|
||||
|
@ -467,7 +457,7 @@ static void scalarizeMaskedGather(CallInst *CI) {
|
|||
BasicBlock *NewIfBlock = CondBlock->splitBasicBlock(InsertPt, "else");
|
||||
Builder.SetInsertPoint(InsertPt);
|
||||
Instruction *OldBr = IfBlock->getTerminator();
|
||||
BranchInst::Create(CondBlock, NewIfBlock, Cmp, OldBr);
|
||||
BranchInst::Create(CondBlock, NewIfBlock, Predicate, OldBr);
|
||||
OldBr->eraseFromParent();
|
||||
PrevIfBlock = IfBlock;
|
||||
IfBlock = NewIfBlock;
|
||||
|
@ -549,15 +539,11 @@ static void scalarizeMaskedScatter(CallInst *CI) {
|
|||
for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
|
||||
// Fill the "else" block, created in the previous iteration
|
||||
//
|
||||
// % Mask1 = extractelement <16 x i1> % Mask, i32 Idx
|
||||
// % ToStore = icmp eq i1 % Mask1, true
|
||||
// br i1 % ToStore, label %cond.store, label %else
|
||||
// %Mask1 = extractelement <16 x i1> %Mask, i32 Idx
|
||||
// br i1 %Mask1, label %cond.store, label %else
|
||||
//
|
||||
Value *Predicate = Builder.CreateExtractElement(Mask, Builder.getInt32(Idx),
|
||||
"Mask" + Twine(Idx));
|
||||
Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Predicate,
|
||||
ConstantInt::get(Predicate->getType(), 1),
|
||||
"ToStore" + Twine(Idx));
|
||||
|
||||
// Create "cond" block
|
||||
//
|
||||
|
@ -578,7 +564,7 @@ static void scalarizeMaskedScatter(CallInst *CI) {
|
|||
BasicBlock *NewIfBlock = CondBlock->splitBasicBlock(InsertPt, "else");
|
||||
Builder.SetInsertPoint(InsertPt);
|
||||
Instruction *OldBr = IfBlock->getTerminator();
|
||||
BranchInst::Create(CondBlock, NewIfBlock, Cmp, OldBr);
|
||||
BranchInst::Create(CondBlock, NewIfBlock, Predicate, OldBr);
|
||||
OldBr->eraseFromParent();
|
||||
IfBlock = NewIfBlock;
|
||||
}
|
||||
|
|
|
@ -70,8 +70,7 @@ declare <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> , i32, <8 x i1> ,
|
|||
; SCALAR: else:
|
||||
; SCALAR-NEXT: %res.phi.else = phi
|
||||
; SCALAR-NEXT: %Mask1 = extractelement <16 x i1> %imask, i32 1
|
||||
; SCALAR-NEXT: %ToLoad1 = icmp eq i1 %Mask1, true
|
||||
; SCALAR-NEXT: br i1 %ToLoad1, label %cond.load1, label %else2
|
||||
; SCALAR-NEXT: br i1 %Mask1, label %cond.load1, label %else2
|
||||
|
||||
define <16 x float> @test2(float* %base, <16 x i32> %ind, i16 %mask) {
|
||||
; KNL_64-LABEL: test2:
|
||||
|
@ -213,8 +212,7 @@ define <16 x i32> @test4(i32* %base, <16 x i32> %ind, i16 %mask) {
|
|||
|
||||
; SCALAR-LABEL: test5
|
||||
; SCALAR: %Mask0 = extractelement <16 x i1> %imask, i32 0
|
||||
; SCALAR-NEXT: %ToStore0 = icmp eq i1 %Mask0, true
|
||||
; SCALAR-NEXT: br i1 %ToStore0, label %cond.store, label %else
|
||||
; SCALAR-NEXT: br i1 %Mask0, label %cond.store, label %else
|
||||
; SCALAR: cond.store:
|
||||
; SCALAR-NEXT: %Elt0 = extractelement <16 x i32> %val, i32 0
|
||||
; SCALAR-NEXT: %Ptr0 = extractelement <16 x i32*> %gep.random, i32 0
|
||||
|
@ -222,8 +220,7 @@ define <16 x i32> @test4(i32* %base, <16 x i32> %ind, i16 %mask) {
|
|||
; SCALAR-NEXT: br label %else
|
||||
; SCALAR: else:
|
||||
; SCALAR-NEXT: %Mask1 = extractelement <16 x i1> %imask, i32 1
|
||||
; SCALAR-NEXT: %ToStore1 = icmp eq i1 %Mask1, true
|
||||
; SCALAR-NEXT: br i1 %ToStore1, label %cond.store1, label %else2
|
||||
; SCALAR-NEXT: br i1 %Mask1, label %cond.store1, label %else2
|
||||
|
||||
define void @test5(i32* %base, <16 x i32> %ind, i16 %mask, <16 x i32>%val) {
|
||||
; KNL_64-LABEL: test5:
|
||||
|
@ -2448,45 +2445,41 @@ define void @v1_scatter(<1 x i32>%a1, <1 x i32*> %ptr, <1 x i1> %mask) {
|
|||
; KNL_64-LABEL: v1_scatter:
|
||||
; KNL_64: # %bb.0:
|
||||
; KNL_64-NEXT: testb $1, %dl
|
||||
; KNL_64-NEXT: jne .LBB43_1
|
||||
; KNL_64-NEXT: # %bb.2: # %else
|
||||
; KNL_64-NEXT: retq
|
||||
; KNL_64-NEXT: .LBB43_1: # %cond.store
|
||||
; KNL_64-NEXT: je .LBB43_2
|
||||
; KNL_64-NEXT: # %bb.1: # %cond.store
|
||||
; KNL_64-NEXT: movl %edi, (%rsi)
|
||||
; KNL_64-NEXT: .LBB43_2: # %else
|
||||
; KNL_64-NEXT: retq
|
||||
;
|
||||
; KNL_32-LABEL: v1_scatter:
|
||||
; KNL_32: # %bb.0:
|
||||
; KNL_32-NEXT: testb $1, {{[0-9]+}}(%esp)
|
||||
; KNL_32-NEXT: jne .LBB43_1
|
||||
; KNL_32-NEXT: # %bb.2: # %else
|
||||
; KNL_32-NEXT: retl
|
||||
; KNL_32-NEXT: .LBB43_1: # %cond.store
|
||||
; KNL_32-NEXT: je .LBB43_2
|
||||
; KNL_32-NEXT: # %bb.1: # %cond.store
|
||||
; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; KNL_32-NEXT: movl %ecx, (%eax)
|
||||
; KNL_32-NEXT: .LBB43_2: # %else
|
||||
; KNL_32-NEXT: retl
|
||||
;
|
||||
; SKX-LABEL: v1_scatter:
|
||||
; SKX: # %bb.0:
|
||||
; SKX-NEXT: testb $1, %dl
|
||||
; SKX-NEXT: jne .LBB43_1
|
||||
; SKX-NEXT: # %bb.2: # %else
|
||||
; SKX-NEXT: retq
|
||||
; SKX-NEXT: .LBB43_1: # %cond.store
|
||||
; SKX-NEXT: je .LBB43_2
|
||||
; SKX-NEXT: # %bb.1: # %cond.store
|
||||
; SKX-NEXT: movl %edi, (%rsi)
|
||||
; SKX-NEXT: .LBB43_2: # %else
|
||||
; SKX-NEXT: retq
|
||||
;
|
||||
; SKX_32-LABEL: v1_scatter:
|
||||
; SKX_32: # %bb.0:
|
||||
; SKX_32-NEXT: testb $1, {{[0-9]+}}(%esp)
|
||||
; SKX_32-NEXT: jne .LBB43_1
|
||||
; SKX_32-NEXT: # %bb.2: # %else
|
||||
; SKX_32-NEXT: retl
|
||||
; SKX_32-NEXT: .LBB43_1: # %cond.store
|
||||
; SKX_32-NEXT: je .LBB43_2
|
||||
; SKX_32-NEXT: # %bb.1: # %cond.store
|
||||
; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; SKX_32-NEXT: movl %ecx, (%eax)
|
||||
; SKX_32-NEXT: .LBB43_2: # %else
|
||||
; SKX_32-NEXT: retl
|
||||
call void @llvm.masked.scatter.v1i32.v1p0i32(<1 x i32> %a1, <1 x i32*> %ptr, i32 4, <1 x i1> %mask)
|
||||
ret void
|
||||
|
|
|
@ -131,12 +131,11 @@ define void @test_scatter_v2i32_index(<2 x double> %a1, double* %base, <2 x i32>
|
|||
; WIDEN_AVX2-NEXT: .LBB1_2: # %else
|
||||
; WIDEN_AVX2-NEXT: vpextrb $8, %xmm2, %eax
|
||||
; WIDEN_AVX2-NEXT: testb $1, %al
|
||||
; WIDEN_AVX2-NEXT: jne .LBB1_3
|
||||
; WIDEN_AVX2-NEXT: # %bb.4: # %else2
|
||||
; WIDEN_AVX2-NEXT: retq
|
||||
; WIDEN_AVX2-NEXT: .LBB1_3: # %cond.store1
|
||||
; WIDEN_AVX2-NEXT: je .LBB1_4
|
||||
; WIDEN_AVX2-NEXT: # %bb.3: # %cond.store1
|
||||
; WIDEN_AVX2-NEXT: vpextrq $1, %xmm1, %rax
|
||||
; WIDEN_AVX2-NEXT: vmovhpd %xmm0, (%rax)
|
||||
; WIDEN_AVX2-NEXT: .LBB1_4: # %else2
|
||||
; WIDEN_AVX2-NEXT: retq
|
||||
;
|
||||
; PROMOTE_AVX2-LABEL: test_scatter_v2i32_index:
|
||||
|
@ -157,12 +156,11 @@ define void @test_scatter_v2i32_index(<2 x double> %a1, double* %base, <2 x i32>
|
|||
; PROMOTE_AVX2-NEXT: .LBB1_2: # %else
|
||||
; PROMOTE_AVX2-NEXT: vpextrb $8, %xmm2, %eax
|
||||
; PROMOTE_AVX2-NEXT: testb $1, %al
|
||||
; PROMOTE_AVX2-NEXT: jne .LBB1_3
|
||||
; PROMOTE_AVX2-NEXT: # %bb.4: # %else2
|
||||
; PROMOTE_AVX2-NEXT: retq
|
||||
; PROMOTE_AVX2-NEXT: .LBB1_3: # %cond.store1
|
||||
; PROMOTE_AVX2-NEXT: je .LBB1_4
|
||||
; PROMOTE_AVX2-NEXT: # %bb.3: # %cond.store1
|
||||
; PROMOTE_AVX2-NEXT: vpextrq $1, %xmm1, %rax
|
||||
; PROMOTE_AVX2-NEXT: vmovhpd %xmm0, (%rax)
|
||||
; PROMOTE_AVX2-NEXT: .LBB1_4: # %else2
|
||||
; PROMOTE_AVX2-NEXT: retq
|
||||
%gep = getelementptr double, double *%base, <2 x i32> %ind
|
||||
call void @llvm.masked.scatter.v2f64.v2p0f64(<2 x double> %a1, <2 x double*> %gep, i32 4, <2 x i1> %mask)
|
||||
|
@ -284,12 +282,11 @@ define void @test_scatter_v2i32_data(<2 x i32>%a1, <2 x i32*> %ptr, <2 x i1>%mas
|
|||
; WIDEN_AVX2-NEXT: .LBB3_2: # %else
|
||||
; WIDEN_AVX2-NEXT: vpextrb $8, %xmm2, %eax
|
||||
; WIDEN_AVX2-NEXT: testb $1, %al
|
||||
; WIDEN_AVX2-NEXT: jne .LBB3_3
|
||||
; WIDEN_AVX2-NEXT: # %bb.4: # %else2
|
||||
; WIDEN_AVX2-NEXT: retq
|
||||
; WIDEN_AVX2-NEXT: .LBB3_3: # %cond.store1
|
||||
; WIDEN_AVX2-NEXT: je .LBB3_4
|
||||
; WIDEN_AVX2-NEXT: # %bb.3: # %cond.store1
|
||||
; WIDEN_AVX2-NEXT: vpextrq $1, %xmm1, %rax
|
||||
; WIDEN_AVX2-NEXT: vextractps $1, %xmm0, (%rax)
|
||||
; WIDEN_AVX2-NEXT: .LBB3_4: # %else2
|
||||
; WIDEN_AVX2-NEXT: retq
|
||||
;
|
||||
; PROMOTE_AVX2-LABEL: test_scatter_v2i32_data:
|
||||
|
@ -303,12 +300,11 @@ define void @test_scatter_v2i32_data(<2 x i32>%a1, <2 x i32*> %ptr, <2 x i1>%mas
|
|||
; PROMOTE_AVX2-NEXT: .LBB3_2: # %else
|
||||
; PROMOTE_AVX2-NEXT: vpextrb $8, %xmm2, %eax
|
||||
; PROMOTE_AVX2-NEXT: testb $1, %al
|
||||
; PROMOTE_AVX2-NEXT: jne .LBB3_3
|
||||
; PROMOTE_AVX2-NEXT: # %bb.4: # %else2
|
||||
; PROMOTE_AVX2-NEXT: retq
|
||||
; PROMOTE_AVX2-NEXT: .LBB3_3: # %cond.store1
|
||||
; PROMOTE_AVX2-NEXT: je .LBB3_4
|
||||
; PROMOTE_AVX2-NEXT: # %bb.3: # %cond.store1
|
||||
; PROMOTE_AVX2-NEXT: vpextrq $1, %xmm1, %rax
|
||||
; PROMOTE_AVX2-NEXT: vextractps $2, %xmm0, (%rax)
|
||||
; PROMOTE_AVX2-NEXT: .LBB3_4: # %else2
|
||||
; PROMOTE_AVX2-NEXT: retq
|
||||
call void @llvm.masked.scatter.v2i32.v2p0i32(<2 x i32> %a1, <2 x i32*> %ptr, i32 4, <2 x i1> %mask)
|
||||
ret void
|
||||
|
@ -438,12 +434,11 @@ define void @test_scatter_v2i32_data_index(<2 x i32> %a1, i32* %base, <2 x i32>
|
|||
; WIDEN_AVX2-NEXT: .LBB5_2: # %else
|
||||
; WIDEN_AVX2-NEXT: vpextrb $8, %xmm2, %eax
|
||||
; WIDEN_AVX2-NEXT: testb $1, %al
|
||||
; WIDEN_AVX2-NEXT: jne .LBB5_3
|
||||
; WIDEN_AVX2-NEXT: # %bb.4: # %else2
|
||||
; WIDEN_AVX2-NEXT: retq
|
||||
; WIDEN_AVX2-NEXT: .LBB5_3: # %cond.store1
|
||||
; WIDEN_AVX2-NEXT: je .LBB5_4
|
||||
; WIDEN_AVX2-NEXT: # %bb.3: # %cond.store1
|
||||
; WIDEN_AVX2-NEXT: vpextrq $1, %xmm1, %rax
|
||||
; WIDEN_AVX2-NEXT: vextractps $1, %xmm0, (%rax)
|
||||
; WIDEN_AVX2-NEXT: .LBB5_4: # %else2
|
||||
; WIDEN_AVX2-NEXT: retq
|
||||
;
|
||||
; PROMOTE_AVX2-LABEL: test_scatter_v2i32_data_index:
|
||||
|
@ -464,12 +459,11 @@ define void @test_scatter_v2i32_data_index(<2 x i32> %a1, i32* %base, <2 x i32>
|
|||
; PROMOTE_AVX2-NEXT: .LBB5_2: # %else
|
||||
; PROMOTE_AVX2-NEXT: vpextrb $8, %xmm2, %eax
|
||||
; PROMOTE_AVX2-NEXT: testb $1, %al
|
||||
; PROMOTE_AVX2-NEXT: jne .LBB5_3
|
||||
; PROMOTE_AVX2-NEXT: # %bb.4: # %else2
|
||||
; PROMOTE_AVX2-NEXT: retq
|
||||
; PROMOTE_AVX2-NEXT: .LBB5_3: # %cond.store1
|
||||
; PROMOTE_AVX2-NEXT: je .LBB5_4
|
||||
; PROMOTE_AVX2-NEXT: # %bb.3: # %cond.store1
|
||||
; PROMOTE_AVX2-NEXT: vpextrq $1, %xmm1, %rax
|
||||
; PROMOTE_AVX2-NEXT: vextractps $2, %xmm0, (%rax)
|
||||
; PROMOTE_AVX2-NEXT: .LBB5_4: # %else2
|
||||
; PROMOTE_AVX2-NEXT: retq
|
||||
%gep = getelementptr i32, i32 *%base, <2 x i32> %ind
|
||||
call void @llvm.masked.scatter.v2i32.v2p0i32(<2 x i32> %a1, <2 x i32*> %gep, i32 4, <2 x i1> %mask)
|
||||
|
|
|
@ -66,21 +66,19 @@ define void @storev1(<1 x i32> %trigger, <1 x i32>* %addr, <1 x i32> %val) {
|
|||
; AVX-LABEL: storev1:
|
||||
; AVX: ## %bb.0:
|
||||
; AVX-NEXT: testl %edi, %edi
|
||||
; AVX-NEXT: je LBB1_1
|
||||
; AVX-NEXT: ## %bb.2: ## %else
|
||||
; AVX-NEXT: retq
|
||||
; AVX-NEXT: LBB1_1: ## %cond.store
|
||||
; AVX-NEXT: jne LBB1_2
|
||||
; AVX-NEXT: ## %bb.1: ## %cond.store
|
||||
; AVX-NEXT: movl %edx, (%rsi)
|
||||
; AVX-NEXT: LBB1_2: ## %else
|
||||
; AVX-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: storev1:
|
||||
; AVX512: ## %bb.0:
|
||||
; AVX512-NEXT: testl %edi, %edi
|
||||
; AVX512-NEXT: je LBB1_1
|
||||
; AVX512-NEXT: ## %bb.2: ## %else
|
||||
; AVX512-NEXT: retq
|
||||
; AVX512-NEXT: LBB1_1: ## %cond.store
|
||||
; AVX512-NEXT: jne LBB1_2
|
||||
; AVX512-NEXT: ## %bb.1: ## %cond.store
|
||||
; AVX512-NEXT: movl %edx, (%rsi)
|
||||
; AVX512-NEXT: LBB1_2: ## %else
|
||||
; AVX512-NEXT: retq
|
||||
%mask = icmp eq <1 x i32> %trigger, zeroinitializer
|
||||
call void @llvm.masked.store.v1i32.p0v1i32(<1 x i32>%val, <1 x i32>* %addr, i32 4, <1 x i1>%mask)
|
||||
|
|
Loading…
Reference in New Issue