[PatternMatch] Match XOR variant of unsigned-add overflow check.

Instcombine folds (a + b <u a) to (a ^ -1 <u b) and that does not match
the expected pattern in CodeGenPerpare via UAddWithOverflow.

This causes a regression over Clang 7 on both X86 and AArch64:
https://gcc.godbolt.org/z/juhXYV

This patch extends UAddWithOverflow to also catch the XOR case, if the
XOR is only used in the ICMP. This covers just a single case, but I'd
like to make sure I am not missing anything before tackling the other
cases.

Reviewers: nikic, RKSimon, lebedev.ri, spatel

Reviewed By: nikic, lebedev.ri

Differential Revision: https://reviews.llvm.org/D74228
This commit is contained in:
Florian Hahn 2020-02-19 14:37:30 +01:00
parent ff4639f060
commit e01a3d49c2
5 changed files with 54 additions and 27 deletions

View File

@ -1674,7 +1674,8 @@ m_UnordFMin(const LHS &L, const RHS &R) {
} }
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
// Matchers for overflow check patterns: e.g. (a + b) u< a // Matchers for overflow check patterns: e.g. (a + b) u< a, (a ^ -1) <u b
// Note that S might be matched to other instructions than AddInst.
// //
template <typename LHS_t, typename RHS_t, typename Sum_t> template <typename LHS_t, typename RHS_t, typename Sum_t>
@ -1705,6 +1706,19 @@ struct UAddWithOverflow_match {
if (AddExpr.match(ICmpRHS) && (ICmpLHS == AddLHS || ICmpLHS == AddRHS)) if (AddExpr.match(ICmpRHS) && (ICmpLHS == AddLHS || ICmpLHS == AddRHS))
return L.match(AddLHS) && R.match(AddRHS) && S.match(ICmpRHS); return L.match(AddLHS) && R.match(AddRHS) && S.match(ICmpRHS);
Value *Op1;
auto XorExpr = m_OneUse(m_Xor(m_Value(Op1), m_AllOnes()));
// (a ^ -1) <u b
if (Pred == ICmpInst::ICMP_ULT) {
if (XorExpr.match(ICmpLHS))
return L.match(Op1) && R.match(ICmpRHS) && S.match(ICmpLHS);
}
// b > u (a ^ -1)
if (Pred == ICmpInst::ICMP_UGT) {
if (XorExpr.match(ICmpRHS))
return L.match(Op1) && R.match(ICmpLHS) && S.match(ICmpRHS);
}
// Match special-case for increment-by-1. // Match special-case for increment-by-1.
if (Pred == ICmpInst::ICMP_EQ) { if (Pred == ICmpInst::ICMP_EQ) {
// (a + 1) == 0 // (a + 1) == 0

View File

@ -399,7 +399,8 @@ class TypePromotionTransaction;
bool simplifyOffsetableRelocate(Instruction &I); bool simplifyOffsetableRelocate(Instruction &I);
bool tryToSinkFreeOperands(Instruction *I); bool tryToSinkFreeOperands(Instruction *I);
bool replaceMathCmpWithIntrinsic(BinaryOperator *BO, CmpInst *Cmp, bool replaceMathCmpWithIntrinsic(BinaryOperator *BO, Value *Arg0,
Value *Arg1, CmpInst *Cmp,
Intrinsic::ID IID); Intrinsic::ID IID);
bool optimizeCmp(CmpInst *Cmp, bool &ModifiedDT); bool optimizeCmp(CmpInst *Cmp, bool &ModifiedDT);
bool combineToUSubWithOverflow(CmpInst *Cmp, bool &ModifiedDT); bool combineToUSubWithOverflow(CmpInst *Cmp, bool &ModifiedDT);
@ -1185,6 +1186,7 @@ static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI,
} }
bool CodeGenPrepare::replaceMathCmpWithIntrinsic(BinaryOperator *BO, bool CodeGenPrepare::replaceMathCmpWithIntrinsic(BinaryOperator *BO,
Value *Arg0, Value *Arg1,
CmpInst *Cmp, CmpInst *Cmp,
Intrinsic::ID IID) { Intrinsic::ID IID) {
if (BO->getParent() != Cmp->getParent()) { if (BO->getParent() != Cmp->getParent()) {
@ -1202,8 +1204,6 @@ bool CodeGenPrepare::replaceMathCmpWithIntrinsic(BinaryOperator *BO,
} }
// We allow matching the canonical IR (add X, C) back to (usubo X, -C). // We allow matching the canonical IR (add X, C) back to (usubo X, -C).
Value *Arg0 = BO->getOperand(0);
Value *Arg1 = BO->getOperand(1);
if (BO->getOpcode() == Instruction::Add && if (BO->getOpcode() == Instruction::Add &&
IID == Intrinsic::usub_with_overflow) { IID == Intrinsic::usub_with_overflow) {
assert(isa<Constant>(Arg1) && "Unexpected input for usubo"); assert(isa<Constant>(Arg1) && "Unexpected input for usubo");
@ -1222,12 +1222,16 @@ bool CodeGenPrepare::replaceMathCmpWithIntrinsic(BinaryOperator *BO,
IRBuilder<> Builder(InsertPt); IRBuilder<> Builder(InsertPt);
Value *MathOV = Builder.CreateBinaryIntrinsic(IID, Arg0, Arg1); Value *MathOV = Builder.CreateBinaryIntrinsic(IID, Arg0, Arg1);
Value *Math = Builder.CreateExtractValue(MathOV, 0, "math"); if (BO->getOpcode() != Instruction::Xor) {
Value *Math = Builder.CreateExtractValue(MathOV, 0, "math");
BO->replaceAllUsesWith(Math);
} else
assert(BO->hasOneUse() &&
"Patterns with XOr should use the BO only in the compare");
Value *OV = Builder.CreateExtractValue(MathOV, 1, "ov"); Value *OV = Builder.CreateExtractValue(MathOV, 1, "ov");
BO->replaceAllUsesWith(Math);
Cmp->replaceAllUsesWith(OV); Cmp->replaceAllUsesWith(OV);
BO->eraseFromParent();
Cmp->eraseFromParent(); Cmp->eraseFromParent();
BO->eraseFromParent();
return true; return true;
} }
@ -1267,9 +1271,13 @@ bool CodeGenPrepare::combineToUAddWithOverflow(CmpInst *Cmp,
bool &ModifiedDT) { bool &ModifiedDT) {
Value *A, *B; Value *A, *B;
BinaryOperator *Add; BinaryOperator *Add;
if (!match(Cmp, m_UAddWithOverflow(m_Value(A), m_Value(B), m_BinOp(Add)))) if (!match(Cmp, m_UAddWithOverflow(m_Value(A), m_Value(B), m_BinOp(Add)))) {
if (!matchUAddWithOverflowConstantEdgeCases(Cmp, Add)) if (!matchUAddWithOverflowConstantEdgeCases(Cmp, Add))
return false; return false;
// Set A and B in case we match matchUAddWithOverflowConstantEdgeCases.
A = Add->getOperand(0);
B = Add->getOperand(1);
}
if (!TLI->shouldFormOverflowOp(ISD::UADDO, if (!TLI->shouldFormOverflowOp(ISD::UADDO,
TLI->getValueType(*DL, Add->getType()), TLI->getValueType(*DL, Add->getType()),
@ -1282,7 +1290,8 @@ bool CodeGenPrepare::combineToUAddWithOverflow(CmpInst *Cmp,
if (Add->getParent() != Cmp->getParent() && !Add->hasOneUse()) if (Add->getParent() != Cmp->getParent() && !Add->hasOneUse())
return false; return false;
if (!replaceMathCmpWithIntrinsic(Add, Cmp, Intrinsic::uadd_with_overflow)) if (!replaceMathCmpWithIntrinsic(Add, A, B, Cmp,
Intrinsic::uadd_with_overflow))
return false; return false;
// Reset callers - do not crash by iterating over a dead instruction. // Reset callers - do not crash by iterating over a dead instruction.
@ -1344,7 +1353,8 @@ bool CodeGenPrepare::combineToUSubWithOverflow(CmpInst *Cmp,
Sub->hasNUsesOrMore(2))) Sub->hasNUsesOrMore(2)))
return false; return false;
if (!replaceMathCmpWithIntrinsic(Sub, Cmp, Intrinsic::usub_with_overflow)) if (!replaceMathCmpWithIntrinsic(Sub, Sub->getOperand(0), Sub->getOperand(1),
Cmp, Intrinsic::usub_with_overflow))
return false; return false;
// Reset callers - do not crash by iterating over a dead instruction. // Reset callers - do not crash by iterating over a dead instruction.

View File

@ -5568,8 +5568,11 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
isa<IntegerType>(A->getType())) { isa<IntegerType>(A->getType())) {
Value *Result; Value *Result;
Constant *Overflow; Constant *Overflow;
if (OptimizeOverflowCheck(Instruction::Add, /*Signed*/false, A, B, // m_UAddWithOverflow can match patterns that do not include an explicit
*AddI, Result, Overflow)) { // "add" instruction, so check the opcode of the matched op.
if (AddI->getOpcode() == Instruction::Add &&
OptimizeOverflowCheck(Instruction::Add, /*Signed*/ false, A, B, *AddI,
Result, Overflow)) {
replaceInstUsesWith(*AddI, Result); replaceInstUsesWith(*AddI, Result);
return replaceInstUsesWith(I, Overflow); return replaceInstUsesWith(I, Overflow);
} }

View File

@ -102,9 +102,9 @@ define i64 @uaddo3_math_overflow_used(i64 %a, i64 %b, i64* %res) nounwind ssp {
; pattern as well. ; pattern as well.
define i64 @uaddo6_xor(i64 %a, i64 %b) { define i64 @uaddo6_xor(i64 %a, i64 %b) {
; CHECK-LABEL: @uaddo6_xor( ; CHECK-LABEL: @uaddo6_xor(
; CHECK-NEXT: [[X:%.*]] = xor i64 [[A:%.*]], -1 ; CHECK-NEXT: [[TMP1:%.*]] = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 [[A:%.*]], i64 [[B:%.*]])
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[X]], [[B:%.*]] ; CHECK-NEXT: [[OV:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1
; CHECK-NEXT: [[Q:%.*]] = select i1 [[CMP]], i64 [[B]], i64 42 ; CHECK-NEXT: [[Q:%.*]] = select i1 [[OV]], i64 [[B]], i64 42
; CHECK-NEXT: ret i64 [[Q]] ; CHECK-NEXT: ret i64 [[Q]]
; ;
%x = xor i64 %a, -1 %x = xor i64 %a, -1
@ -115,13 +115,13 @@ define i64 @uaddo6_xor(i64 %a, i64 %b) {
define i64 @uaddo6_xor_commuted(i64 %a, i64 %b) { define i64 @uaddo6_xor_commuted(i64 %a, i64 %b) {
; CHECK-LABEL: @uaddo6_xor_commuted( ; CHECK-LABEL: @uaddo6_xor_commuted(
; CHECK-NEXT: [[X:%.*]] = xor i64 -1, [[A:%.*]] ; CHECK-NEXT: [[TMP1:%.*]] = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 [[A:%.*]], i64 [[B:%.*]])
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[X]], [[B:%.*]] ; CHECK-NEXT: [[OV:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1
; CHECK-NEXT: [[Q:%.*]] = select i1 [[CMP]], i64 [[B]], i64 42 ; CHECK-NEXT: [[Q:%.*]] = select i1 [[OV]], i64 [[B]], i64 42
; CHECK-NEXT: ret i64 [[Q]] ; CHECK-NEXT: ret i64 [[Q]]
; ;
%x = xor i64 -1, %a %x = xor i64 %a, -1
%cmp = icmp ult i64 %x, %b %cmp = icmp ugt i64 %b, %x
%Q = select i1 %cmp, i64 %b, i64 42 %Q = select i1 %cmp, i64 %b, i64 42
ret i64 %Q ret i64 %Q
} }

View File

@ -153,9 +153,9 @@ exit:
; pattern as well. ; pattern as well.
define i64 @uaddo6_xor(i64 %a, i64 %b) { define i64 @uaddo6_xor(i64 %a, i64 %b) {
; CHECK-LABEL: @uaddo6_xor( ; CHECK-LABEL: @uaddo6_xor(
; CHECK-NEXT: [[X:%.*]] = xor i64 [[A:%.*]], -1 ; CHECK-NEXT: [[TMP1:%.*]] = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 [[A:%.*]], i64 [[B:%.*]])
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[X]], [[B:%.*]] ; CHECK-NEXT: [[OV:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1
; CHECK-NEXT: [[Q:%.*]] = select i1 [[CMP]], i64 [[B]], i64 42 ; CHECK-NEXT: [[Q:%.*]] = select i1 [[OV]], i64 [[B]], i64 42
; CHECK-NEXT: ret i64 [[Q]] ; CHECK-NEXT: ret i64 [[Q]]
; ;
%x = xor i64 %a, -1 %x = xor i64 %a, -1
@ -166,12 +166,12 @@ define i64 @uaddo6_xor(i64 %a, i64 %b) {
define i64 @uaddo6_xor_commuted(i64 %a, i64 %b) { define i64 @uaddo6_xor_commuted(i64 %a, i64 %b) {
; CHECK-LABEL: @uaddo6_xor_commuted( ; CHECK-LABEL: @uaddo6_xor_commuted(
; CHECK-NEXT: [[X:%.*]] = xor i64 -1, [[A:%.*]] ; CHECK-NEXT: [[TMP1:%.*]] = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 [[A:%.*]], i64 [[B:%.*]])
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[X]], [[B:%.*]] ; CHECK-NEXT: [[OV:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1
; CHECK-NEXT: [[Q:%.*]] = select i1 [[CMP]], i64 [[B]], i64 42 ; CHECK-NEXT: [[Q:%.*]] = select i1 [[OV]], i64 [[B]], i64 42
; CHECK-NEXT: ret i64 [[Q]] ; CHECK-NEXT: ret i64 [[Q]]
; ;
%x = xor i64 -1, %a %x = xor i64 %a, -1
%cmp = icmp ult i64 %x, %b %cmp = icmp ult i64 %x, %b
%Q = select i1 %cmp, i64 %b, i64 42 %Q = select i1 %cmp, i64 %b, i64 42
ret i64 %Q ret i64 %Q