forked from OSchip/llvm-project
[X86][InstCombine] Add constant folding and simplification support for pdep and pext
The instructions use a mask to either pack disjoint bits together(pext) or spread bits to disjoint locations(pdep). If the mask is all 0s then no bits are extracted or deposited. If the mask is all ones, then the source value is written to the result since no compression or expansion happens. Otherwise if both the source and mask are constant we can walk the bits in the source/mask and calculate the result. There other crazier things we could do like computeKnownBits or turning pext into shift/and if only a single contiguous range of bits is extracted. Fixes PR44389 Differential Revision: https://reviews.llvm.org/D71952
This commit is contained in:
parent
1cc8a74de3
commit
374e0299cf
|
@ -2487,6 +2487,64 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
|
|||
// TODO should we convert this to an AND if the RHS is constant?
|
||||
}
|
||||
break;
|
||||
case Intrinsic::x86_bmi_pext_32:
|
||||
case Intrinsic::x86_bmi_pext_64:
|
||||
if (auto *MaskC = dyn_cast<ConstantInt>(II->getArgOperand(1))) {
|
||||
if (MaskC->isNullValue())
|
||||
return replaceInstUsesWith(CI, ConstantInt::get(II->getType(), 0));
|
||||
if (MaskC->isAllOnesValue())
|
||||
return replaceInstUsesWith(CI, II->getArgOperand(0));
|
||||
|
||||
if (auto *SrcC = dyn_cast<ConstantInt>(II->getArgOperand(0))) {
|
||||
uint64_t Src = SrcC->getZExtValue();
|
||||
uint64_t Mask = MaskC->getZExtValue();
|
||||
uint64_t Result = 0;
|
||||
uint64_t BitToSet = 1;
|
||||
|
||||
while (Mask) {
|
||||
// Isolate lowest set bit.
|
||||
uint64_t BitToTest = Mask & -Mask;
|
||||
if (BitToTest & Src)
|
||||
Result |= BitToSet;
|
||||
|
||||
BitToSet <<= 1;
|
||||
// Clear lowest set bit.
|
||||
Mask &= Mask - 1;
|
||||
}
|
||||
|
||||
return replaceInstUsesWith(CI, ConstantInt::get(II->getType(), Result));
|
||||
}
|
||||
}
|
||||
break;
|
||||
case Intrinsic::x86_bmi_pdep_32:
|
||||
case Intrinsic::x86_bmi_pdep_64:
|
||||
if (auto *MaskC = dyn_cast<ConstantInt>(II->getArgOperand(1))) {
|
||||
if (MaskC->isNullValue())
|
||||
return replaceInstUsesWith(CI, ConstantInt::get(II->getType(), 0));
|
||||
if (MaskC->isAllOnesValue())
|
||||
return replaceInstUsesWith(CI, II->getArgOperand(0));
|
||||
|
||||
if (auto *SrcC = dyn_cast<ConstantInt>(II->getArgOperand(0))) {
|
||||
uint64_t Src = SrcC->getZExtValue();
|
||||
uint64_t Mask = MaskC->getZExtValue();
|
||||
uint64_t Result = 0;
|
||||
uint64_t BitToTest = 1;
|
||||
|
||||
while (Mask) {
|
||||
// Isolate lowest set bit.
|
||||
uint64_t BitToSet = Mask & -Mask;
|
||||
if (BitToTest & Src)
|
||||
Result |= BitToSet;
|
||||
|
||||
BitToTest <<= 1;
|
||||
// Clear lowest set bit;
|
||||
Mask &= Mask - 1;
|
||||
}
|
||||
|
||||
return replaceInstUsesWith(CI, ConstantInt::get(II->getType(), Result));
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case Intrinsic::x86_vcvtph2ps_128:
|
||||
case Intrinsic::x86_vcvtph2ps_256: {
|
||||
|
|
|
@ -7,6 +7,10 @@ declare i32 @llvm.x86.bmi.bextr.32(i32, i32) nounwind readnone
|
|||
declare i64 @llvm.x86.bmi.bextr.64(i64, i64) nounwind readnone
|
||||
declare i32 @llvm.x86.bmi.bzhi.32(i32, i32) nounwind readnone
|
||||
declare i64 @llvm.x86.bmi.bzhi.64(i64, i64) nounwind readnone
|
||||
declare i32 @llvm.x86.bmi.pext.32(i32, i32) nounwind readnone
|
||||
declare i64 @llvm.x86.bmi.pext.64(i64, i64) nounwind readnone
|
||||
declare i32 @llvm.x86.bmi.pdep.32(i32, i32) nounwind readnone
|
||||
declare i64 @llvm.x86.bmi.pdep.64(i64, i64) nounwind readnone
|
||||
|
||||
define i32 @test_x86_tbm_bextri_u32(i32 %a) nounwind readnone {
|
||||
; CHECK-LABEL: @test_x86_tbm_bextri_u32(
|
||||
|
@ -269,3 +273,131 @@ define i64 @test_x86_bmi_bzhi_64_constfold() nounwind readnone {
|
|||
%1 = tail call i64 @llvm.x86.bmi.bzhi.64(i64 5, i64 1)
|
||||
ret i64 %1
|
||||
}
|
||||
|
||||
define i32 @test_x86_pext_32_zero_mask(i32 %x) nounwind readnone {
|
||||
; CHECK-LABEL: @test_x86_pext_32_zero_mask(
|
||||
; CHECK-NEXT: ret i32 0
|
||||
;
|
||||
%1 = tail call i32 @llvm.x86.bmi.pext.32(i32 %x, i32 0)
|
||||
ret i32 %1
|
||||
}
|
||||
|
||||
define i64 @test_x86_pext_64_zero_mask(i64 %x) nounwind readnone {
|
||||
; CHECK-LABEL: @test_x86_pext_64_zero_mask(
|
||||
; CHECK-NEXT: ret i64 0
|
||||
;
|
||||
%1 = tail call i64 @llvm.x86.bmi.pext.64(i64 %x, i64 0)
|
||||
ret i64 %1
|
||||
}
|
||||
|
||||
define i32 @test_x86_pext_32_allones_mask(i32 %x) nounwind readnone {
|
||||
; CHECK-LABEL: @test_x86_pext_32_allones_mask(
|
||||
; CHECK-NEXT: ret i32 %x
|
||||
;
|
||||
%1 = tail call i32 @llvm.x86.bmi.pext.32(i32 %x, i32 -1)
|
||||
ret i32 %1
|
||||
}
|
||||
|
||||
define i64 @test_x86_pext_64_allones_mask(i64 %x) nounwind readnone {
|
||||
; CHECK-LABEL: @test_x86_pext_64_allones_mask(
|
||||
; CHECK-NEXT: ret i64 %x
|
||||
;
|
||||
%1 = tail call i64 @llvm.x86.bmi.pext.64(i64 %x, i64 -1)
|
||||
ret i64 %1
|
||||
}
|
||||
|
||||
define i32 @test_x86_pext_32_constant_fold() nounwind readnone {
|
||||
; CHECK-LABEL: @test_x86_pext_32_constant_fold(
|
||||
; CHECK-NEXT: ret i32 30001
|
||||
;
|
||||
%1 = tail call i32 @llvm.x86.bmi.pext.32(i32 1985229328, i32 4042322160)
|
||||
ret i32 %1
|
||||
}
|
||||
|
||||
define i64 @test_x86_pext_64_constant_fold() nounwind readnone {
|
||||
; CHECK-LABEL: @test_x86_pext_64_constant_fold(
|
||||
; CHECK-NEXT: ret i64 1966210489
|
||||
;
|
||||
%1 = tail call i64 @llvm.x86.bmi.pext.64(i64 8526495043095935640, i64 -1085102592571150096)
|
||||
ret i64 %1
|
||||
}
|
||||
|
||||
define i32 @test_x86_pext_32_constant_fold_2() nounwind readnone {
|
||||
; CHECK-LABEL: @test_x86_pext_32_constant_fold_2(
|
||||
; CHECK-NEXT: ret i32 30224
|
||||
;
|
||||
%1 = tail call i32 @llvm.x86.bmi.pext.32(i32 1985229328, i32 4278190335)
|
||||
ret i32 %1
|
||||
}
|
||||
|
||||
define i64 @test_x86_pext_64_constant_fold_2() nounwind readnone {
|
||||
; CHECK-LABEL: @test_x86_pext_64_constant_fold_2(
|
||||
; CHECK-NEXT: ret i64 1980816570
|
||||
;
|
||||
%1 = tail call i64 @llvm.x86.bmi.pext.64(i64 8526495043095935640, i64 -72056498804490496)
|
||||
ret i64 %1
|
||||
}
|
||||
|
||||
define i32 @test_x86_pdep_32_zero_mask(i32 %x) nounwind readnone {
|
||||
; CHECK-LABEL: @test_x86_pdep_32_zero_mask(
|
||||
; CHECK-NEXT: ret i32 0
|
||||
;
|
||||
%1 = tail call i32 @llvm.x86.bmi.pdep.32(i32 %x, i32 0)
|
||||
ret i32 %1
|
||||
}
|
||||
|
||||
define i64 @test_x86_pdep_64_zero_mask(i64 %x) nounwind readnone {
|
||||
; CHECK-LABEL: @test_x86_pdep_64_zero_mask(
|
||||
; CHECK-NEXT: ret i64 0
|
||||
;
|
||||
%1 = tail call i64 @llvm.x86.bmi.pdep.64(i64 %x, i64 0)
|
||||
ret i64 %1
|
||||
}
|
||||
|
||||
define i32 @test_x86_pdep_32_allones_mask(i32 %x) nounwind readnone {
|
||||
; CHECK-LABEL: @test_x86_pdep_32_allones_mask(
|
||||
; CHECK-NEXT: ret i32 %x
|
||||
;
|
||||
%1 = tail call i32 @llvm.x86.bmi.pdep.32(i32 %x, i32 -1)
|
||||
ret i32 %1
|
||||
}
|
||||
|
||||
define i64 @test_x86_pdep_64_allones_mask(i64 %x) nounwind readnone {
|
||||
; CHECK-LABEL: @test_x86_pdep_64_allones_mask(
|
||||
; CHECK-NEXT: ret i64 %x
|
||||
;
|
||||
%1 = tail call i64 @llvm.x86.bmi.pdep.64(i64 %x, i64 -1)
|
||||
ret i64 %1
|
||||
}
|
||||
|
||||
define i32 @test_x86_pdep_32_constant_fold() nounwind readnone {
|
||||
; CHECK-LABEL: @test_x86_pdep_32_constant_fold(
|
||||
; CHECK-NEXT: ret i32 807407616
|
||||
;
|
||||
%1 = tail call i32 @llvm.x86.bmi.pdep.32(i32 1985229328, i32 4042322160)
|
||||
ret i32 %1
|
||||
}
|
||||
|
||||
define i64 @test_x86_pdep_64_constant_fold() nounwind readnone {
|
||||
; CHECK-LABEL: @test_x86_pdep_64_constant_fold(
|
||||
; CHECK-NEXT: ret i64 -1089641583808049024
|
||||
;
|
||||
%1 = tail call i64 @llvm.x86.bmi.pdep.64(i64 8526495043095935640, i64 -1085102592571150096)
|
||||
ret i64 %1
|
||||
}
|
||||
|
||||
define i32 @test_x86_pdep_32_constant_fold_2() nounwind readnone {
|
||||
; CHECK-LABEL: @test_x86_pdep_32_constant_fold_2(
|
||||
; CHECK-NEXT: ret i32 838860816
|
||||
;
|
||||
%1 = tail call i32 @llvm.x86.bmi.pdep.32(i32 1985229328, i32 4278190335)
|
||||
ret i32 %1
|
||||
}
|
||||
|
||||
define i64 @test_x86_pdep_64_constant_fold_2() nounwind readnone {
|
||||
; CHECK-LABEL: @test_x86_pdep_64_constant_fold_2(
|
||||
; CHECK-NEXT: ret i64 -144114243170822144
|
||||
;
|
||||
%1 = tail call i64 @llvm.x86.bmi.pdep.64(i64 8526495043095935640, i64 -72056498804490496)
|
||||
ret i64 %1
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue