forked from OSchip/llvm-project
[InstCombine] Re-commit: Fold 'check for [no] signed truncation' pattern
Summary: [[ https://bugs.llvm.org/show_bug.cgi?id=38149 | PR38149 ]] As discussed in https://reviews.llvm.org/D49179#1158957 and later, the IR for 'check for [no] signed truncation' pattern can be improved: https://rise4fun.com/Alive/gBf ^ that pattern will be produced by Implicit Integer Truncation sanitizer, https://reviews.llvm.org/D48958 https://bugs.llvm.org/show_bug.cgi?id=21530 in signed case, therefore it is probably a good idea to improve it. The DAGCombine will reverse this transform, see https://reviews.llvm.org/D49266 This transform is surprisingly frustrating. This does not deal with non-splat shift amounts, or with undef shift amounts. I've outlined what i think the solution should be: ``` // Potential handling of non-splats: for each element: // * if both are undef, replace with constant 0. // Because (1<<0) is OK and is 1, and ((1<<0)>>1) is also OK and is 0. // * if both are not undef, and are different, bailout. // * else, only one is undef, then pick the non-undef one. ``` This is a re-commit, as the original patch, committed in rL337190 was reverted in rL337344 as it broke chromium build: https://bugs.llvm.org/show_bug.cgi?id=38204 and https://crbug.com/864832 Proofs that the fixed folds are ok: https://rise4fun.com/Alive/VYM Differential Revision: https://reviews.llvm.org/D49320 llvm-svn: 337376
This commit is contained in:
parent
21813140f6
commit
3cb87e905c
|
@ -2945,6 +2945,81 @@ static Value *foldICmpWithLowBitMaskedVal(ICmpInst &I,
|
|||
return Builder.CreateICmp(DstPred, X, M);
|
||||
}
|
||||
|
||||
/// Some comparisons can be simplified.
|
||||
/// In this case, we are looking for comparisons that look like
|
||||
/// a check for a lossy signed truncation.
|
||||
/// Folds: (MaskedBits is a constant.)
|
||||
/// ((%x << MaskedBits) a>> MaskedBits) SrcPred %x
|
||||
/// Into:
|
||||
/// (add %x, (1 << (KeptBits-1))) DstPred (1 << KeptBits)
|
||||
/// Where KeptBits = bitwidth(%x) - MaskedBits
|
||||
static Value *
|
||||
foldICmpWithTruncSignExtendedVal(ICmpInst &I,
|
||||
InstCombiner::BuilderTy &Builder) {
|
||||
ICmpInst::Predicate SrcPred;
|
||||
Value *X;
|
||||
const APInt *C0, *C1; // FIXME: non-splats, potentially with undef.
|
||||
// We are ok with 'shl' having multiple uses, but 'ashr' must be one-use.
|
||||
if (!match(&I, m_c_ICmp(SrcPred,
|
||||
m_OneUse(m_AShr(m_Shl(m_Value(X), m_APInt(C0)),
|
||||
m_APInt(C1))),
|
||||
m_Deferred(X))))
|
||||
return nullptr;
|
||||
|
||||
// Potential handling of non-splats: for each element:
|
||||
// * if both are undef, replace with constant 0.
|
||||
// Because (1<<0) is OK and is 1, and ((1<<0)>>1) is also OK and is 0.
|
||||
// * if both are not undef, and are different, bailout.
|
||||
// * else, only one is undef, then pick the non-undef one.
|
||||
|
||||
// The shift amount must be equal.
|
||||
if (*C0 != *C1)
|
||||
return nullptr;
|
||||
const APInt &MaskedBits = *C0;
|
||||
assert(MaskedBits != 0 && "shift by zero should be folded away already.");
|
||||
|
||||
ICmpInst::Predicate DstPred;
|
||||
switch (SrcPred) {
|
||||
case ICmpInst::Predicate::ICMP_EQ:
|
||||
// ((%x << MaskedBits) a>> MaskedBits) == %x
|
||||
// =>
|
||||
// (add %x, (1 << (KeptBits-1))) u< (1 << KeptBits)
|
||||
DstPred = ICmpInst::Predicate::ICMP_ULT;
|
||||
break;
|
||||
case ICmpInst::Predicate::ICMP_NE:
|
||||
// ((%x << MaskedBits) a>> MaskedBits) != %x
|
||||
// =>
|
||||
// (add %x, (1 << (KeptBits-1))) u>= (1 << KeptBits)
|
||||
DstPred = ICmpInst::Predicate::ICMP_UGE;
|
||||
break;
|
||||
// FIXME: are more folds possible?
|
||||
default:
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
auto *XType = X->getType();
|
||||
const unsigned XBitWidth = XType->getScalarSizeInBits();
|
||||
const APInt BitWidth = APInt(XBitWidth, XBitWidth);
|
||||
assert(BitWidth.ugt(MaskedBits) && "shifts should leave some bits untouched");
|
||||
|
||||
// KeptBits = bitwidth(%x) - MaskedBits
|
||||
const APInt KeptBits = BitWidth - MaskedBits;
|
||||
assert(KeptBits.ugt(0) && KeptBits.ult(BitWidth) && "unreachable");
|
||||
// ICmpCst = (1 << KeptBits)
|
||||
const APInt ICmpCst = APInt(XBitWidth, 1).shl(KeptBits);
|
||||
assert(ICmpCst.isPowerOf2());
|
||||
// AddCst = (1 << (KeptBits-1))
|
||||
const APInt AddCst = ICmpCst.lshr(1);
|
||||
assert(AddCst.ult(ICmpCst) && AddCst.isPowerOf2());
|
||||
|
||||
// T0 = add %x, AddCst
|
||||
Value *T0 = Builder.CreateAdd(X, ConstantInt::get(XType, AddCst));
|
||||
// T1 = T0 DstPred ICmpCst
|
||||
Value *T1 = Builder.CreateICmp(DstPred, T0, ConstantInt::get(XType, ICmpCst));
|
||||
|
||||
return T1;
|
||||
}
|
||||
|
||||
/// Try to fold icmp (binop), X or icmp X, (binop).
|
||||
/// TODO: A large part of this logic is duplicated in InstSimplify's
|
||||
/// simplifyICmpWithBinOp(). We should be able to share that and avoid the code
|
||||
|
@ -3285,6 +3360,9 @@ Instruction *InstCombiner::foldICmpBinOp(ICmpInst &I) {
|
|||
if (Value *V = foldICmpWithLowBitMaskedVal(I, Builder))
|
||||
return replaceInstUsesWith(I, V);
|
||||
|
||||
if (Value *V = foldICmpWithTruncSignExtendedVal(I, Builder))
|
||||
return replaceInstUsesWith(I, V);
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
|
|
|
@ -15,9 +15,8 @@
|
|||
|
||||
define i1 @p0(i8 %x) {
|
||||
; CHECK-LABEL: @p0(
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = shl i8 [[X:%.*]], 5
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = ashr exact i8 [[TMP0]], 5
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i8 [[TMP1]], [[X]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = add i8 [[X:%.*]], 4
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i8 [[TMP1]], 8
|
||||
; CHECK-NEXT: ret i1 [[TMP2]]
|
||||
;
|
||||
%tmp0 = shl i8 %x, 5
|
||||
|
@ -29,9 +28,8 @@ define i1 @p0(i8 %x) {
|
|||
; Big unusual bit width, https://bugs.llvm.org/show_bug.cgi?id=38204
|
||||
define i1 @pb(i65 %x) {
|
||||
; CHECK-LABEL: @pb(
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = shl i65 [[X:%.*]], 1
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = ashr exact i65 [[TMP0]], 1
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i65 [[TMP1]], [[X]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = add i65 [[X:%.*]], 9223372036854775808
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt i65 [[TMP1]], -1
|
||||
; CHECK-NEXT: ret i1 [[TMP2]]
|
||||
;
|
||||
%tmp0 = shl i65 %x, 1
|
||||
|
@ -46,9 +44,8 @@ define i1 @pb(i65 %x) {
|
|||
|
||||
define <2 x i1> @p1_vec_splat(<2 x i8> %x) {
|
||||
; CHECK-LABEL: @p1_vec_splat(
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = shl <2 x i8> [[X:%.*]], <i8 5, i8 5>
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = ashr exact <2 x i8> [[TMP0]], <i8 5, i8 5>
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <2 x i8> [[TMP1]], [[X]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i8> [[X:%.*]], <i8 4, i8 4>
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = icmp ult <2 x i8> [[TMP1]], <i8 8, i8 8>
|
||||
; CHECK-NEXT: ret <2 x i1> [[TMP2]]
|
||||
;
|
||||
%tmp0 = shl <2 x i8> %x, <i8 5, i8 5>
|
||||
|
@ -118,9 +115,8 @@ declare i8 @gen8()
|
|||
define i1 @c0() {
|
||||
; CHECK-LABEL: @c0(
|
||||
; CHECK-NEXT: [[X:%.*]] = call i8 @gen8()
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = shl i8 [[X]], 5
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = ashr exact i8 [[TMP0]], 5
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i8 [[X]], [[TMP1]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = add i8 [[X]], 4
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i8 [[TMP1]], 8
|
||||
; CHECK-NEXT: ret i1 [[TMP2]]
|
||||
;
|
||||
%x = call i8 @gen8()
|
||||
|
@ -140,8 +136,8 @@ define i1 @n_oneuse0(i8 %x) {
|
|||
; CHECK-LABEL: @n_oneuse0(
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = shl i8 [[X:%.*]], 5
|
||||
; CHECK-NEXT: call void @use8(i8 [[TMP0]])
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = ashr exact i8 [[TMP0]], 5
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i8 [[TMP1]], [[X]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = add i8 [[X]], 4
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i8 [[TMP1]], 8
|
||||
; CHECK-NEXT: ret i1 [[TMP2]]
|
||||
;
|
||||
%tmp0 = shl i8 %x, 5
|
||||
|
|
|
@ -15,9 +15,8 @@
|
|||
|
||||
define i1 @p0(i8 %x) {
|
||||
; CHECK-LABEL: @p0(
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = shl i8 [[X:%.*]], 5
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = ashr exact i8 [[TMP0]], 5
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i8 [[TMP1]], [[X]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = add i8 [[X:%.*]], 4
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = icmp ugt i8 [[TMP1]], 7
|
||||
; CHECK-NEXT: ret i1 [[TMP2]]
|
||||
;
|
||||
%tmp0 = shl i8 %x, 5
|
||||
|
@ -29,9 +28,8 @@ define i1 @p0(i8 %x) {
|
|||
; Big unusual bit width, https://bugs.llvm.org/show_bug.cgi?id=38204
|
||||
define i1 @pb(i65 %x) {
|
||||
; CHECK-LABEL: @pb(
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = shl i65 [[X:%.*]], 1
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = ashr exact i65 [[TMP0]], 1
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i65 [[TMP1]], [[X]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = add i65 [[X:%.*]], 9223372036854775808
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = icmp slt i65 [[TMP1]], 0
|
||||
; CHECK-NEXT: ret i1 [[TMP2]]
|
||||
;
|
||||
%tmp0 = shl i65 %x, 1
|
||||
|
@ -46,9 +44,8 @@ define i1 @pb(i65 %x) {
|
|||
|
||||
define <2 x i1> @p1_vec_splat(<2 x i8> %x) {
|
||||
; CHECK-LABEL: @p1_vec_splat(
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = shl <2 x i8> [[X:%.*]], <i8 5, i8 5>
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = ashr exact <2 x i8> [[TMP0]], <i8 5, i8 5>
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = icmp ne <2 x i8> [[TMP1]], [[X]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i8> [[X:%.*]], <i8 4, i8 4>
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = icmp ugt <2 x i8> [[TMP1]], <i8 7, i8 7>
|
||||
; CHECK-NEXT: ret <2 x i1> [[TMP2]]
|
||||
;
|
||||
%tmp0 = shl <2 x i8> %x, <i8 5, i8 5>
|
||||
|
@ -118,9 +115,8 @@ declare i8 @gen8()
|
|||
define i1 @c0() {
|
||||
; CHECK-LABEL: @c0(
|
||||
; CHECK-NEXT: [[X:%.*]] = call i8 @gen8()
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = shl i8 [[X]], 5
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = ashr exact i8 [[TMP0]], 5
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i8 [[X]], [[TMP1]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = add i8 [[X]], 4
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = icmp ugt i8 [[TMP1]], 7
|
||||
; CHECK-NEXT: ret i1 [[TMP2]]
|
||||
;
|
||||
%x = call i8 @gen8()
|
||||
|
@ -140,8 +136,8 @@ define i1 @n_oneuse0(i8 %x) {
|
|||
; CHECK-LABEL: @n_oneuse0(
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = shl i8 [[X:%.*]], 5
|
||||
; CHECK-NEXT: call void @use8(i8 [[TMP0]])
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = ashr exact i8 [[TMP0]], 5
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i8 [[TMP1]], [[X]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = add i8 [[X]], 4
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = icmp ugt i8 [[TMP1]], 7
|
||||
; CHECK-NEXT: ret i1 [[TMP2]]
|
||||
;
|
||||
%tmp0 = shl i8 %x, 5
|
||||
|
|
Loading…
Reference in New Issue