[InstCombine] (A + B) + B --> A + (B << 1)

This eliminates a use of 'B', so it can enable follow-on transforms
as well as improve analysis/codegen.

The PhaseOrdering test was added for D61726, and that shows
the limits of instcombine vs. real reassociation. We would
need to run some form of CSE to collapse that further.

The intermediate variable naming here is intentional because
there's a test at llvm/test/Bitcode/value-with-long-name.ll
that would break with the usual nameless value. I'm not sure
how to improve that test to be more robust.

The naming may also be helpful to debug regressions if this
change exposes weaknesses in the reassociation pass for example.
This commit is contained in:
Sanjay Patel 2020-05-22 11:37:58 -04:00
parent b603794061
commit 2f7c24fe30
4 changed files with 36 additions and 43 deletions

View File

@ -1303,6 +1303,14 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
match(&I, m_BinOp(m_c_Add(m_Not(m_Value(B)), m_Value(A)), m_One())))
return BinaryOperator::CreateSub(A, B);
// (A + RHS) + RHS --> A + (RHS << 1)
if (match(LHS, m_OneUse(m_c_Add(m_Value(A), m_Specific(RHS)))))
return BinaryOperator::CreateAdd(A, Builder.CreateShl(RHS, 1, "reass.add"));
// LHS + (A + LHS) --> A + (LHS << 1)
if (match(RHS, m_OneUse(m_c_Add(m_Value(A), m_Specific(LHS)))))
return BinaryOperator::CreateAdd(A, Builder.CreateShl(LHS, 1, "reass.add"));
// X % C0 + (( X / C0 ) % C1) * C0 => X % (C0 * C1)
if (Value *V = SimplifyAddWithRemainder(I)) return replaceInstUsesWith(I, V);

View File

@ -228,12 +228,10 @@ define <2 x i1> @test11vec(<2 x i8> %a) {
ret <2 x i1> %c
}
; Should be transformed into shl x, 1?
define i8 @reassoc_shl1(i8 %x, i8 %y) {
; CHECK-LABEL: @reassoc_shl1(
; CHECK-NEXT: [[A:%.*]] = add i8 [[Y:%.*]], [[X:%.*]]
; CHECK-NEXT: [[R:%.*]] = add i8 [[A]], [[X]]
; CHECK-NEXT: [[REASS_ADD:%.*]] = shl i8 [[X:%.*]], 1
; CHECK-NEXT: [[R:%.*]] = add i8 [[REASS_ADD]], [[Y:%.*]]
; CHECK-NEXT: ret i8 [[R]]
;
%a = add i8 %y, %x
@ -243,8 +241,8 @@ define i8 @reassoc_shl1(i8 %x, i8 %y) {
define <2 x i8> @reassoc_shl1_commute1(<2 x i8> %x, <2 x i8> %y) {
; CHECK-LABEL: @reassoc_shl1_commute1(
; CHECK-NEXT: [[A:%.*]] = add <2 x i8> [[X:%.*]], [[Y:%.*]]
; CHECK-NEXT: [[R:%.*]] = add <2 x i8> [[A]], [[X]]
; CHECK-NEXT: [[REASS_ADD:%.*]] = shl <2 x i8> [[X:%.*]], <i8 1, i8 1>
; CHECK-NEXT: [[R:%.*]] = add <2 x i8> [[REASS_ADD]], [[Y:%.*]]
; CHECK-NEXT: ret <2 x i8> [[R]]
;
%a = add <2 x i8> %x, %y
@ -256,8 +254,8 @@ define i8 @reassoc_shl1_commute2(i8 %px, i8 %py) {
; CHECK-LABEL: @reassoc_shl1_commute2(
; CHECK-NEXT: [[X:%.*]] = sdiv i8 42, [[PX:%.*]]
; CHECK-NEXT: [[Y:%.*]] = sdiv i8 43, [[PY:%.*]]
; CHECK-NEXT: [[A:%.*]] = add nsw i8 [[Y]], [[X]]
; CHECK-NEXT: [[R:%.*]] = add i8 [[X]], [[A]]
; CHECK-NEXT: [[REASS_ADD:%.*]] = shl i8 [[X]], 1
; CHECK-NEXT: [[R:%.*]] = add i8 [[Y]], [[REASS_ADD]]
; CHECK-NEXT: ret i8 [[R]]
;
%x = sdiv i8 42, %px ; thwart complexity-based canonicalization
@ -271,8 +269,8 @@ define i8 @reassoc_shl1_commute3(i8 %px, i8 %py) {
; CHECK-LABEL: @reassoc_shl1_commute3(
; CHECK-NEXT: [[X:%.*]] = sdiv i8 42, [[PX:%.*]]
; CHECK-NEXT: [[Y:%.*]] = sdiv i8 43, [[PY:%.*]]
; CHECK-NEXT: [[A:%.*]] = add nsw i8 [[X]], [[Y]]
; CHECK-NEXT: [[R:%.*]] = add i8 [[X]], [[A]]
; CHECK-NEXT: [[REASS_ADD:%.*]] = shl i8 [[X]], 1
; CHECK-NEXT: [[R:%.*]] = add i8 [[Y]], [[REASS_ADD]]
; CHECK-NEXT: ret i8 [[R]]
;
%x = sdiv i8 42, %px ; thwart complexity-based canonicalization
@ -855,8 +853,8 @@ define <2 x i64> @test41vec_and_multiuse(<2 x i32> %a) {
; CHECK-LABEL: @test41vec_and_multiuse(
; CHECK-NEXT: [[ADD:%.*]] = add nuw <2 x i32> [[A:%.*]], <i32 16, i32 16>
; CHECK-NEXT: [[ZEXT:%.*]] = zext <2 x i32> [[ADD]] to <2 x i64>
; CHECK-NEXT: [[SUB:%.*]] = add nsw <2 x i64> [[ZEXT]], <i64 -1, i64 -1>
; CHECK-NEXT: [[EXTRAUSE:%.*]] = add nsw <2 x i64> [[SUB]], [[ZEXT]]
; CHECK-NEXT: [[REASS_ADD:%.*]] = shl nuw nsw <2 x i64> [[ZEXT]], <i64 1, i64 1>
; CHECK-NEXT: [[EXTRAUSE:%.*]] = add nsw <2 x i64> [[REASS_ADD]], <i64 -1, i64 -1>
; CHECK-NEXT: ret <2 x i64> [[EXTRAUSE]]
;
%add = add nuw <2 x i32> %a, <i32 16, i32 16>

View File

@ -2006,24 +2006,11 @@ bb10:
define i64 @test_chr_22(i1 %i, i64* %j, i64 %v0) !prof !14 {
; CHECK-LABEL: @test_chr_22(
; CHECK-NEXT: bb0:
; CHECK-NEXT: [[V1:%.*]] = add i64 [[V0:%.*]], 3
; CHECK-NEXT: [[V2:%.*]] = add i64 [[V1]], [[V0]]
; CHECK-NEXT: [[C1:%.*]] = icmp slt i64 [[V2]], 100
; CHECK-NEXT: [[V300:%.*]] = mul i64 [[V2]], -8647960034816487527
; CHECK-NEXT: [[V301:%.*]] = icmp ne i64 [[V300]], 100
; CHECK-NEXT: [[TMP0:%.*]] = and i1 [[C1]], [[V301]]
; CHECK-NEXT: br i1 [[TMP0]], label [[BB0_SPLIT:%.*]], label [[BB0_SPLIT_NONCHR:%.*]], !prof !15
; CHECK: bb0.split:
; CHECK-NEXT: [[REASS_ADD:%.*]] = shl i64 [[V0:%.*]], 1
; CHECK-NEXT: [[V2:%.*]] = add i64 [[REASS_ADD]], 3
; CHECK-NEXT: [[V299:%.*]] = mul i64 [[V2]], 7860086430977039991
; CHECK-NEXT: store i64 [[V299]], i64* [[J:%.*]], align 4
; CHECK-NEXT: ret i64 99
; CHECK: bb0.split.nonchr:
; CHECK-NEXT: [[V300_NONCHR:%.*]] = mul i64 [[V2]], -8647960034816487527
; CHECK-NEXT: [[V301_NONCHR:%.*]] = icmp eq i64 [[V300_NONCHR]], 100
; CHECK-NEXT: [[V302_NONCHR_V:%.*]] = select i1 [[V301_NONCHR]], i64 1938697607916024098, i64 7860086430977039991, !prof !16
; CHECK-NEXT: [[V302_NONCHR:%.*]] = mul i64 [[V2]], [[V302_NONCHR_V]]
; CHECK-NEXT: store i64 [[V302_NONCHR]], i64* [[J]], align 4
; CHECK-NEXT: ret i64 99
;
bb0:
%v1 = add i64 %v0, 3

View File

@ -42,14 +42,14 @@ define dso_local i64 @func(i64 %blah, i64 %limit) #0 {
; OLDPM-NEXT: [[K_05:%.*]] = phi i64 [ 1, [[FOR_BODY_LR_PH_NEW]] ], [ [[AND]], [[FOR_BODY]] ]
; OLDPM-NEXT: [[NITER:%.*]] = phi i64 [ [[UNROLL_ITER]], [[FOR_BODY_LR_PH_NEW]] ], [ [[NITER_NSUB_7:%.*]], [[FOR_BODY]] ]
; OLDPM-NEXT: [[AND]] = and i64 [[CONV]], [[K_05]]
; OLDPM-NEXT: [[ADD:%.*]] = add i64 [[AND]], [[G_06]]
; OLDPM-NEXT: [[ADD_1:%.*]] = add i64 [[AND]], [[ADD]]
; OLDPM-NEXT: [[ADD_2:%.*]] = add i64 [[AND]], [[ADD_1]]
; OLDPM-NEXT: [[ADD_3:%.*]] = add i64 [[AND]], [[ADD_2]]
; OLDPM-NEXT: [[ADD_4:%.*]] = add i64 [[AND]], [[ADD_3]]
; OLDPM-NEXT: [[ADD_5:%.*]] = add i64 [[AND]], [[ADD_4]]
; OLDPM-NEXT: [[ADD_6:%.*]] = add i64 [[AND]], [[ADD_5]]
; OLDPM-NEXT: [[ADD_7]] = add i64 [[AND]], [[ADD_6]]
; OLDPM-NEXT: [[REASS_ADD:%.*]] = shl nuw nsw i64 [[AND]], 1
; OLDPM-NEXT: [[ADD_1:%.*]] = add i64 [[G_06]], [[REASS_ADD]]
; OLDPM-NEXT: [[REASS_ADD9:%.*]] = shl nuw nsw i64 [[AND]], 1
; OLDPM-NEXT: [[ADD_3:%.*]] = add i64 [[ADD_1]], [[REASS_ADD9]]
; OLDPM-NEXT: [[REASS_ADD10:%.*]] = shl nuw nsw i64 [[AND]], 1
; OLDPM-NEXT: [[ADD_5:%.*]] = add i64 [[ADD_3]], [[REASS_ADD10]]
; OLDPM-NEXT: [[REASS_ADD11:%.*]] = shl nuw nsw i64 [[AND]], 1
; OLDPM-NEXT: [[ADD_7]] = add i64 [[ADD_5]], [[REASS_ADD11]]
; OLDPM-NEXT: [[NITER_NSUB_7]] = add i64 [[NITER]], -8
; OLDPM-NEXT: [[NITER_NCMP_7:%.*]] = icmp eq i64 [[NITER_NSUB_7]], 0
; OLDPM-NEXT: br i1 [[NITER_NCMP_7]], label [[FOR_COND_CLEANUP_LOOPEXIT_UNR_LCSSA]], label [[FOR_BODY]]
@ -89,14 +89,14 @@ define dso_local i64 @func(i64 %blah, i64 %limit) #0 {
; NEWPM-NEXT: [[G_06:%.*]] = phi i64 [ undef, [[FOR_BODY_LR_PH_NEW]] ], [ [[ADD_7]], [[FOR_BODY_FOR_BODY_CRIT_EDGE:%.*]] ]
; NEWPM-NEXT: [[AND_PHI]] = phi i64 [ [[AND_0]], [[FOR_BODY_LR_PH_NEW]] ], [ [[AND_1:%.*]], [[FOR_BODY_FOR_BODY_CRIT_EDGE]] ]
; NEWPM-NEXT: [[NITER:%.*]] = phi i64 [ [[UNROLL_ITER]], [[FOR_BODY_LR_PH_NEW]] ], [ [[NITER_NSUB_7:%.*]], [[FOR_BODY_FOR_BODY_CRIT_EDGE]] ]
; NEWPM-NEXT: [[ADD:%.*]] = add i64 [[AND_PHI]], [[G_06]]
; NEWPM-NEXT: [[ADD_1:%.*]] = add i64 [[AND_PHI]], [[ADD]]
; NEWPM-NEXT: [[ADD_2:%.*]] = add i64 [[AND_PHI]], [[ADD_1]]
; NEWPM-NEXT: [[ADD_3:%.*]] = add i64 [[AND_PHI]], [[ADD_2]]
; NEWPM-NEXT: [[ADD_4:%.*]] = add i64 [[AND_PHI]], [[ADD_3]]
; NEWPM-NEXT: [[ADD_5:%.*]] = add i64 [[AND_PHI]], [[ADD_4]]
; NEWPM-NEXT: [[ADD_6:%.*]] = add i64 [[AND_PHI]], [[ADD_5]]
; NEWPM-NEXT: [[ADD_7]] = add i64 [[AND_PHI]], [[ADD_6]]
; NEWPM-NEXT: [[REASS_ADD:%.*]] = shl nuw nsw i64 [[AND_PHI]], 1
; NEWPM-NEXT: [[ADD_1:%.*]] = add i64 [[G_06]], [[REASS_ADD]]
; NEWPM-NEXT: [[REASS_ADD9:%.*]] = shl nuw nsw i64 [[AND_PHI]], 1
; NEWPM-NEXT: [[ADD_3:%.*]] = add i64 [[ADD_1]], [[REASS_ADD9]]
; NEWPM-NEXT: [[REASS_ADD10:%.*]] = shl nuw nsw i64 [[AND_PHI]], 1
; NEWPM-NEXT: [[ADD_5:%.*]] = add i64 [[ADD_3]], [[REASS_ADD10]]
; NEWPM-NEXT: [[REASS_ADD11:%.*]] = shl nuw nsw i64 [[AND_PHI]], 1
; NEWPM-NEXT: [[ADD_7]] = add i64 [[ADD_5]], [[REASS_ADD11]]
; NEWPM-NEXT: [[NITER_NSUB_7]] = add i64 [[NITER]], -8
; NEWPM-NEXT: [[NITER_NCMP_7:%.*]] = icmp eq i64 [[NITER_NSUB_7]], 0
; NEWPM-NEXT: br i1 [[NITER_NCMP_7]], label [[FOR_COND_CLEANUP_LOOPEXIT_UNR_LCSSA]], label [[FOR_BODY_FOR_BODY_CRIT_EDGE]]