From 6fa5e62c25f4523ab62ec0265afc3917b9d6c16b Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Thu, 31 Jan 2019 16:40:07 +0000 Subject: [PATCH] [PatternMatch] add special-case uaddo matching for increment-by-one This is the most important uaddo problem mentioned in PR31754: https://bugs.llvm.org/show_bug.cgi?id=31754 We were failing to match the canonicalized pattern when it's an 'add 1' operation. Pattern matching, however, shouldn't assume that we have canonicalized IR, so we match 4 commuted variants of uaddo. There's also a test with a crazy type to show that the existing CGP transform based on this matcher is not limited by target legality checks, but that's a different problem. Differential Revision: https://reviews.llvm.org/D57516 llvm-svn: 352766 --- llvm/include/llvm/IR/PatternMatch.h | 14 ++++++ .../test/CodeGen/X86/codegen-prepare-uaddo.ll | 4 +- .../CodeGenPrepare/overflow-intrinsics.ll | 45 ++++++++++--------- 3 files changed, 40 insertions(+), 23 deletions(-) diff --git a/llvm/include/llvm/IR/PatternMatch.h b/llvm/include/llvm/IR/PatternMatch.h index 1f1a37d4304f..6c51d4877370 100644 --- a/llvm/include/llvm/IR/PatternMatch.h +++ b/llvm/include/llvm/IR/PatternMatch.h @@ -1463,6 +1463,20 @@ struct UAddWithOverflow_match { if (AddExpr.match(ICmpRHS) && (ICmpLHS == AddLHS || ICmpLHS == AddRHS)) return L.match(AddLHS) && R.match(AddRHS) && S.match(ICmpRHS); + // Match special-case for increment-by-1. + if (Pred == ICmpInst::ICMP_EQ) { + // (a + 1) == 0 + // (1 + a) == 0 + if (AddExpr.match(ICmpLHS) && m_ZeroInt().match(ICmpRHS) && + (m_One().match(AddLHS) || m_One().match(AddRHS))) + return L.match(AddLHS) && R.match(AddRHS) && S.match(ICmpLHS); + // 0 == (a + 1) + // 0 == (1 + a) + if (m_ZeroInt().match(ICmpLHS) && AddExpr.match(ICmpRHS) && + (m_One().match(AddLHS) || m_One().match(AddRHS))) + return L.match(AddLHS) && R.match(AddRHS) && S.match(ICmpRHS); + } + return false; } }; diff --git a/llvm/test/CodeGen/X86/codegen-prepare-uaddo.ll b/llvm/test/CodeGen/X86/codegen-prepare-uaddo.ll index 52dfe1c8792b..1ac9eb24440d 100644 --- a/llvm/test/CodeGen/X86/codegen-prepare-uaddo.ll +++ b/llvm/test/CodeGen/X86/codegen-prepare-uaddo.ll @@ -42,9 +42,7 @@ define void @test_0(i64*, i64*) { define void @test_1(i64*, i64*) { ; CHECK-LABEL: test_1: ; CHECK: # %bb.0: -; CHECK-NEXT: incq (%rdi) -; CHECK-NEXT: sete %al -; CHECK-NEXT: addb $-1, %al +; CHECK-NEXT: addq $1, (%rdi) ; CHECK-NEXT: adcq $0, (%rsi) ; CHECK-NEXT: retq %3 = load i64, i64* %0, align 8 diff --git a/llvm/test/Transforms/CodeGenPrepare/overflow-intrinsics.ll b/llvm/test/Transforms/CodeGenPrepare/overflow-intrinsics.ll index d3d237689809..6b91a3b3c189 100644 --- a/llvm/test/Transforms/CodeGenPrepare/overflow-intrinsics.ll +++ b/llvm/test/Transforms/CodeGenPrepare/overflow-intrinsics.ll @@ -105,10 +105,11 @@ exit: define i1 @uaddo_i64_increment(i64 %x, i64* %p) { ; CHECK-LABEL: @uaddo_i64_increment( -; CHECK-NEXT: [[A:%.*]] = add i64 [[X:%.*]], 1 -; CHECK-NEXT: [[OV:%.*]] = icmp eq i64 [[A]], 0 -; CHECK-NEXT: store i64 [[A]], i64* [[P:%.*]] -; CHECK-NEXT: ret i1 [[OV]] +; CHECK-NEXT: [[UADD_OVERFLOW:%.*]] = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 [[X:%.*]], i64 1) +; CHECK-NEXT: [[UADD:%.*]] = extractvalue { i64, i1 } [[UADD_OVERFLOW]], 0 +; CHECK-NEXT: [[OVERFLOW:%.*]] = extractvalue { i64, i1 } [[UADD_OVERFLOW]], 1 +; CHECK-NEXT: store i64 [[UADD]], i64* [[P:%.*]] +; CHECK-NEXT: ret i1 [[OVERFLOW]] ; %a = add i64 %x, 1 %ov = icmp eq i64 %a, 0 @@ -118,10 +119,11 @@ define i1 @uaddo_i64_increment(i64 %x, i64* %p) { define i1 @uaddo_i8_increment_noncanonical_1(i8 %x, i8* %p) { ; CHECK-LABEL: @uaddo_i8_increment_noncanonical_1( -; CHECK-NEXT: [[A:%.*]] = add i8 1, [[X:%.*]] -; CHECK-NEXT: [[OV:%.*]] = icmp eq i8 [[A]], 0 -; CHECK-NEXT: store i8 [[A]], i8* [[P:%.*]] -; CHECK-NEXT: ret i1 [[OV]] +; CHECK-NEXT: [[UADD_OVERFLOW:%.*]] = call { i8, i1 } @llvm.uadd.with.overflow.i8(i8 1, i8 [[X:%.*]]) +; CHECK-NEXT: [[UADD:%.*]] = extractvalue { i8, i1 } [[UADD_OVERFLOW]], 0 +; CHECK-NEXT: [[OVERFLOW:%.*]] = extractvalue { i8, i1 } [[UADD_OVERFLOW]], 1 +; CHECK-NEXT: store i8 [[UADD]], i8* [[P:%.*]] +; CHECK-NEXT: ret i1 [[OVERFLOW]] ; %a = add i8 1, %x ; commute %ov = icmp eq i8 %a, 0 @@ -131,10 +133,11 @@ define i1 @uaddo_i8_increment_noncanonical_1(i8 %x, i8* %p) { define i1 @uaddo_i32_increment_noncanonical_2(i32 %x, i32* %p) { ; CHECK-LABEL: @uaddo_i32_increment_noncanonical_2( -; CHECK-NEXT: [[A:%.*]] = add i32 [[X:%.*]], 1 -; CHECK-NEXT: [[OV:%.*]] = icmp eq i32 0, [[A]] -; CHECK-NEXT: store i32 [[A]], i32* [[P:%.*]] -; CHECK-NEXT: ret i1 [[OV]] +; CHECK-NEXT: [[UADD_OVERFLOW:%.*]] = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 [[X:%.*]], i32 1) +; CHECK-NEXT: [[UADD:%.*]] = extractvalue { i32, i1 } [[UADD_OVERFLOW]], 0 +; CHECK-NEXT: [[OVERFLOW:%.*]] = extractvalue { i32, i1 } [[UADD_OVERFLOW]], 1 +; CHECK-NEXT: store i32 [[UADD]], i32* [[P:%.*]] +; CHECK-NEXT: ret i1 [[OVERFLOW]] ; %a = add i32 %x, 1 %ov = icmp eq i32 0, %a ; commute @@ -144,10 +147,11 @@ define i1 @uaddo_i32_increment_noncanonical_2(i32 %x, i32* %p) { define i1 @uaddo_i16_increment_noncanonical_3(i16 %x, i16* %p) { ; CHECK-LABEL: @uaddo_i16_increment_noncanonical_3( -; CHECK-NEXT: [[A:%.*]] = add i16 1, [[X:%.*]] -; CHECK-NEXT: [[OV:%.*]] = icmp eq i16 0, [[A]] -; CHECK-NEXT: store i16 [[A]], i16* [[P:%.*]] -; CHECK-NEXT: ret i1 [[OV]] +; CHECK-NEXT: [[UADD_OVERFLOW:%.*]] = call { i16, i1 } @llvm.uadd.with.overflow.i16(i16 1, i16 [[X:%.*]]) +; CHECK-NEXT: [[UADD:%.*]] = extractvalue { i16, i1 } [[UADD_OVERFLOW]], 0 +; CHECK-NEXT: [[OVERFLOW:%.*]] = extractvalue { i16, i1 } [[UADD_OVERFLOW]], 1 +; CHECK-NEXT: store i16 [[UADD]], i16* [[P:%.*]] +; CHECK-NEXT: ret i1 [[OVERFLOW]] ; %a = add i16 1, %x ; commute %ov = icmp eq i16 0, %a ; commute @@ -159,10 +163,11 @@ define i1 @uaddo_i16_increment_noncanonical_3(i16 %x, i16* %p) { define i1 @uaddo_i42_increment_illegal_type(i42 %x, i42* %p) { ; CHECK-LABEL: @uaddo_i42_increment_illegal_type( -; CHECK-NEXT: [[A:%.*]] = add i42 [[X:%.*]], 1 -; CHECK-NEXT: [[OV:%.*]] = icmp eq i42 [[A]], 0 -; CHECK-NEXT: store i42 [[A]], i42* [[P:%.*]] -; CHECK-NEXT: ret i1 [[OV]] +; CHECK-NEXT: [[UADD_OVERFLOW:%.*]] = call { i42, i1 } @llvm.uadd.with.overflow.i42(i42 [[X:%.*]], i42 1) +; CHECK-NEXT: [[UADD:%.*]] = extractvalue { i42, i1 } [[UADD_OVERFLOW]], 0 +; CHECK-NEXT: [[OVERFLOW:%.*]] = extractvalue { i42, i1 } [[UADD_OVERFLOW]], 1 +; CHECK-NEXT: store i42 [[UADD]], i42* [[P:%.*]] +; CHECK-NEXT: ret i1 [[OVERFLOW]] ; %a = add i42 %x, 1 %ov = icmp eq i42 %a, 0