forked from OSchip/llvm-project
[X86][InstCombine] Add basic simplification support for BEXTR/BEXTRI intrinsics.
This patch adds simplification support for the BEXTR/BEXTRI intrinsics to match gcc. This only supports cases that fold to 0 or can be fully constant folded. Theoretically we could support converting to AND if the shift part is unused or to only a shift if the mask doesn't modify any bits after an equivalent shl. gcc doesn't do these transformations either. I put this in InstCombine, but it could be done in InstSimplify. It would be the first target specific intrinsic in InstSimplify. Differential Revision: https://reviews.llvm.org/D36063 llvm-svn: 309603
This commit is contained in:
parent
1b4e9ae384
commit
8324003818
|
@ -2248,6 +2248,32 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
|
|||
}
|
||||
break;
|
||||
|
||||
case Intrinsic::x86_bmi_bextr_32:
|
||||
case Intrinsic::x86_bmi_bextr_64:
|
||||
case Intrinsic::x86_tbm_bextri_u32:
|
||||
case Intrinsic::x86_tbm_bextri_u64:
|
||||
// If the RHS is a constant we can try some simplifications.
|
||||
if (auto *C = dyn_cast<ConstantInt>(II->getArgOperand(1))) {
|
||||
uint64_t Shift = C->getZExtValue();
|
||||
uint64_t Length = (Shift >> 8) & 0xff;
|
||||
Shift &= 0xff;
|
||||
unsigned BitWidth = II->getType()->getIntegerBitWidth();
|
||||
// If the length is 0 or the shift is out of range, replace with zero.
|
||||
if (Length == 0 || Shift >= BitWidth)
|
||||
return replaceInstUsesWith(CI, ConstantInt::get(II->getType(), 0));
|
||||
// If the LHS is also a constant, we can completely constant fold this.
|
||||
if (auto *InC = dyn_cast<ConstantInt>(II->getArgOperand(0))) {
|
||||
uint64_t Result = InC->getZExtValue() >> Shift;
|
||||
if (Length > BitWidth)
|
||||
Length = BitWidth;
|
||||
Result &= maskTrailingOnes<uint64_t>(Length);
|
||||
return replaceInstUsesWith(CI, ConstantInt::get(II->getType(), Result));
|
||||
}
|
||||
// TODO should we turn this into 'and' if shift is 0? Or 'shl' if we
|
||||
// are only masking bits that a shift already cleared?
|
||||
}
|
||||
break;
|
||||
|
||||
case Intrinsic::x86_vcvtph2ps_128:
|
||||
case Intrinsic::x86_vcvtph2ps_256: {
|
||||
auto Arg = II->getArgOperand(0);
|
||||
|
|
|
@ -0,0 +1,203 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||
; RUN: opt < %s -instcombine -S | FileCheck %s
|
||||
|
||||
declare i32 @llvm.x86.tbm.bextri.u32(i32, i32) nounwind readnone
|
||||
declare i64 @llvm.x86.tbm.bextri.u64(i64, i64) nounwind readnone
|
||||
declare i32 @llvm.x86.bmi.bextr.32(i32, i32) nounwind readnone
|
||||
declare i64 @llvm.x86.bmi.bextr.64(i64, i64) nounwind readnone
|
||||
|
||||
define i32 @test_x86_tbm_bextri_u32(i32 %a) nounwind readnone {
|
||||
; CHECK-LABEL: @test_x86_tbm_bextri_u32(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.x86.tbm.bextri.u32(i32 [[A:%.*]], i32 1296)
|
||||
; CHECK-NEXT: ret i32 [[TMP1]]
|
||||
;
|
||||
%1 = tail call i32 @llvm.x86.tbm.bextri.u32(i32 %a, i32 1296)
|
||||
ret i32 %1
|
||||
}
|
||||
|
||||
define i32 @test_x86_tbm_bextri_u32_zero_length(i32 %a) nounwind readnone {
|
||||
; CHECK-LABEL: @test_x86_tbm_bextri_u32_zero_length(
|
||||
; CHECK-NEXT: ret i32 0
|
||||
;
|
||||
%1 = tail call i32 @llvm.x86.tbm.bextri.u32(i32 %a, i32 1)
|
||||
ret i32 %1
|
||||
}
|
||||
|
||||
define i32 @test_x86_tbm_bextri_u32_large_shift(i32 %a) nounwind readnone {
|
||||
; CHECK-LABEL: @test_x86_tbm_bextri_u32_large_shift(
|
||||
; CHECK-NEXT: ret i32 0
|
||||
;
|
||||
%1 = tail call i32 @llvm.x86.tbm.bextri.u32(i32 %a, i32 288)
|
||||
ret i32 %1
|
||||
}
|
||||
|
||||
define i64 @test_x86_tbm_bextri_u64(i64 %a) nounwind readnone {
|
||||
; CHECK-LABEL: @test_x86_tbm_bextri_u64(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.x86.tbm.bextri.u64(i64 [[A:%.*]], i64 1312)
|
||||
; CHECK-NEXT: ret i64 [[TMP1]]
|
||||
;
|
||||
%1 = tail call i64 @llvm.x86.tbm.bextri.u64(i64 %a, i64 1312)
|
||||
ret i64 %1
|
||||
}
|
||||
|
||||
define i64 @test_x86_tbm_bextri_u64_zero_length(i64 %a) nounwind readnone {
|
||||
; CHECK-LABEL: @test_x86_tbm_bextri_u64_zero_length(
|
||||
; CHECK-NEXT: ret i64 0
|
||||
;
|
||||
%1 = tail call i64 @llvm.x86.tbm.bextri.u64(i64 %a, i64 1)
|
||||
ret i64 %1
|
||||
}
|
||||
|
||||
define i64 @test_x86_tbm_bextri_u64_large_shift(i64 %a) nounwind readnone {
|
||||
; CHECK-LABEL: @test_x86_tbm_bextri_u64_large_shift(
|
||||
; CHECK-NEXT: ret i64 0
|
||||
;
|
||||
%1 = tail call i64 @llvm.x86.tbm.bextri.u64(i64 %a, i64 320)
|
||||
ret i64 %1
|
||||
}
|
||||
|
||||
define i32 @test_x86_tbm_bextri_u32_constfold() nounwind readnone {
|
||||
; CHECK-LABEL: @test_x86_tbm_bextri_u32_constfold(
|
||||
; CHECK-NEXT: ret i32 57005
|
||||
;
|
||||
%1 = tail call i32 @llvm.x86.tbm.bextri.u32(i32 3735928559, i32 4112) ; extract bits 31:16 from 0xDEADBEEF
|
||||
ret i32 %1
|
||||
}
|
||||
|
||||
define i32 @test_x86_tbm_bextri_u32_constfold2() nounwind readnone {
|
||||
; CHECK-LABEL: @test_x86_tbm_bextri_u32_constfold2(
|
||||
; CHECK-NEXT: ret i32 233495534
|
||||
;
|
||||
%1 = tail call i32 @llvm.x86.tbm.bextri.u32(i32 3735928559, i32 8196) ; extract bits 35:4 from 0xDEADBEEF
|
||||
ret i32 %1
|
||||
}
|
||||
|
||||
define i32 @test_x86_tbm_bextri_u32_constfold3() nounwind readnone {
|
||||
; CHECK-LABEL: @test_x86_tbm_bextri_u32_constfold3(
|
||||
; CHECK-NEXT: ret i32 233495534
|
||||
;
|
||||
%1 = tail call i32 @llvm.x86.tbm.bextri.u32(i32 3735928559, i32 16388) ; extract bits 67:4 from 0xDEADBEEF
|
||||
ret i32 %1
|
||||
}
|
||||
|
||||
define i64 @test_x86_tbm_bextri_u64_constfold() nounwind readnone {
|
||||
; CHECK-LABEL: @test_x86_tbm_bextri_u64_constfold(
|
||||
; CHECK-NEXT: ret i64 57005
|
||||
;
|
||||
%1 = tail call i64 @llvm.x86.tbm.bextri.u64(i64 3735928559, i64 4112) ; extract bits 31:16 from 0xDEADBEEF
|
||||
ret i64 %1
|
||||
}
|
||||
|
||||
define i64 @test_x86_tbm_bextri_u64_constfold2() nounwind readnone {
|
||||
; CHECK-LABEL: @test_x86_tbm_bextri_u64_constfold2(
|
||||
; CHECK-NEXT: ret i64 233495534
|
||||
;
|
||||
%1 = tail call i64 @llvm.x86.tbm.bextri.u64(i64 3735928559, i64 16388) ; extract bits 67:4 from 0xDEADBEEF
|
||||
ret i64 %1
|
||||
}
|
||||
|
||||
define i64 @test_x86_tbm_bextri_u64_constfold3() nounwind readnone {
|
||||
; CHECK-LABEL: @test_x86_tbm_bextri_u64_constfold3(
|
||||
; CHECK-NEXT: ret i64 233495534
|
||||
;
|
||||
%1 = tail call i64 @llvm.x86.tbm.bextri.u64(i64 3735928559, i64 32772) ; extract bits 131:4 from 0xDEADBEEF
|
||||
ret i64 %1
|
||||
}
|
||||
|
||||
define i32 @test_x86_bmi_bextri_32(i32 %a) nounwind readnone {
|
||||
; CHECK-LABEL: @test_x86_bmi_bextri_32(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.x86.bmi.bextr.32(i32 [[A:%.*]], i32 1296)
|
||||
; CHECK-NEXT: ret i32 [[TMP1]]
|
||||
;
|
||||
%1 = tail call i32 @llvm.x86.bmi.bextr.32(i32 %a, i32 1296)
|
||||
ret i32 %1
|
||||
}
|
||||
|
||||
define i32 @test_x86_bmi_bextri_32_zero_length(i32 %a) nounwind readnone {
|
||||
; CHECK-LABEL: @test_x86_bmi_bextri_32_zero_length(
|
||||
; CHECK-NEXT: ret i32 0
|
||||
;
|
||||
%1 = tail call i32 @llvm.x86.bmi.bextr.32(i32 %a, i32 1)
|
||||
ret i32 %1
|
||||
}
|
||||
|
||||
define i32 @test_x86_bmi_bextri_32_large_shift(i32 %a) nounwind readnone {
|
||||
; CHECK-LABEL: @test_x86_bmi_bextri_32_large_shift(
|
||||
; CHECK-NEXT: ret i32 0
|
||||
;
|
||||
%1 = tail call i32 @llvm.x86.bmi.bextr.32(i32 %a, i32 288)
|
||||
ret i32 %1
|
||||
}
|
||||
|
||||
define i64 @test_x86_bmi_bextri_64(i64 %a) nounwind readnone {
|
||||
; CHECK-LABEL: @test_x86_bmi_bextri_64(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.x86.bmi.bextr.64(i64 [[A:%.*]], i64 1312)
|
||||
; CHECK-NEXT: ret i64 [[TMP1]]
|
||||
;
|
||||
%1 = tail call i64 @llvm.x86.bmi.bextr.64(i64 %a, i64 1312)
|
||||
ret i64 %1
|
||||
}
|
||||
|
||||
define i64 @test_x86_bmi_bextri_64_zero_length(i64 %a) nounwind readnone {
|
||||
; CHECK-LABEL: @test_x86_bmi_bextri_64_zero_length(
|
||||
; CHECK-NEXT: ret i64 0
|
||||
;
|
||||
%1 = tail call i64 @llvm.x86.bmi.bextr.64(i64 %a, i64 1)
|
||||
ret i64 %1
|
||||
}
|
||||
|
||||
define i64 @test_x86_bmi_bextri_64_large_shift(i64 %a) nounwind readnone {
|
||||
; CHECK-LABEL: @test_x86_bmi_bextri_64_large_shift(
|
||||
; CHECK-NEXT: ret i64 0
|
||||
;
|
||||
%1 = tail call i64 @llvm.x86.bmi.bextr.64(i64 %a, i64 320)
|
||||
ret i64 %1
|
||||
}
|
||||
|
||||
define i32 @test_x86_bmi_bextri_32_constfold() nounwind readnone {
|
||||
; CHECK-LABEL: @test_x86_bmi_bextri_32_constfold(
|
||||
; CHECK-NEXT: ret i32 57005
|
||||
;
|
||||
%1 = tail call i32 @llvm.x86.bmi.bextr.32(i32 3735928559, i32 4112) ; extract bits 31:16 from 0xDEADBEEF
|
||||
ret i32 %1
|
||||
}
|
||||
|
||||
define i32 @test_x86_bmi_bextri_32_constfold2() nounwind readnone {
|
||||
; CHECK-LABEL: @test_x86_bmi_bextri_32_constfold2(
|
||||
; CHECK-NEXT: ret i32 233495534
|
||||
;
|
||||
%1 = tail call i32 @llvm.x86.bmi.bextr.32(i32 3735928559, i32 8196) ; extract bits 35:4 from 0xDEADBEEF
|
||||
ret i32 %1
|
||||
}
|
||||
|
||||
define i32 @test_x86_bmi_bextri_32_constfold3() nounwind readnone {
|
||||
; CHECK-LABEL: @test_x86_bmi_bextri_32_constfold3(
|
||||
; CHECK-NEXT: ret i32 233495534
|
||||
;
|
||||
%1 = tail call i32 @llvm.x86.bmi.bextr.32(i32 3735928559, i32 16388) ; extract bits 67:4 from 0xDEADBEEF
|
||||
ret i32 %1
|
||||
}
|
||||
|
||||
define i64 @test_x86_bmi_bextri_64_constfold() nounwind readnone {
|
||||
; CHECK-LABEL: @test_x86_bmi_bextri_64_constfold(
|
||||
; CHECK-NEXT: ret i64 57005
|
||||
;
|
||||
%1 = tail call i64 @llvm.x86.bmi.bextr.64(i64 3735928559, i64 4112) ; extract bits 31:16 from 0xDEADBEEF
|
||||
ret i64 %1
|
||||
}
|
||||
|
||||
define i64 @test_x86_bmi_bextri_64_constfold2() nounwind readnone {
|
||||
; CHECK-LABEL: @test_x86_bmi_bextri_64_constfold2(
|
||||
; CHECK-NEXT: ret i64 233495534
|
||||
;
|
||||
%1 = tail call i64 @llvm.x86.bmi.bextr.64(i64 3735928559, i64 16388) ; extract bits 67:4 from 0xDEADBEEF
|
||||
ret i64 %1
|
||||
}
|
||||
|
||||
define i64 @test_x86_bmi_bextri_64_constfold3() nounwind readnone {
|
||||
; CHECK-LABEL: @test_x86_bmi_bextri_64_constfold3(
|
||||
; CHECK-NEXT: ret i64 233495534
|
||||
;
|
||||
%1 = tail call i64 @llvm.x86.bmi.bextr.64(i64 3735928559, i64 32772) ; extract bits 131:4 from 0xDEADBEEF
|
||||
ret i64 %1
|
||||
}
|
Loading…
Reference in New Issue