[Reassociate] Cleanup minor missed optimizations

In analyzing issue #56483, it was noticed that running `opt` with `-reassociate` was missing some minor optimizations. For example, there were cases where the running `opt` on IR with floating-point instructions that have the `fast` flags applied, sometimes resulted in less efficient code than the input IR (things like dead instructions left behind, and missed reassociations). These were sometimes noted in the test-files with TODOs, to investigate further. This commit fixes some of these problems, removing some TODOs in the process. FTR, I refer to these as "minor" missed optimizations, because when running a full clang/llvm compilation, these inefficiencies are not happening, as other passes clean that residue up. Regardless, having cleaner IR produced by `opt`, makes assessing the quality of fixes done in `opt` easier.
2022-07-14 08:21:04 -07:00 · 2022-07-14 08:21:04 -07:00 · 230c8c56f2
parent 486787210d
commit 230c8c56f2
4 changed files with 58 additions and 55 deletions
--- a/llvm/lib/Transforms/Scalar/Reassociate.cpp
+++ b/llvm/lib/Transforms/Scalar/Reassociate.cpp
@ -449,7 +449,8 @@ using RepeatedValue = std::pair<Value*, APInt>;
 /// of the expression) if it can turn them into binary operators of the right
 /// type and thus make the expression bigger.
 static bool LinearizeExprTree(Instruction *I,
-                              SmallVectorImpl<RepeatedValue> &Ops) {
+                              SmallVectorImpl<RepeatedValue> &Ops,
+                              ReassociatePass::OrderedSet &ToRedo) {
  assert((isa<UnaryOperator>(I) || isa<BinaryOperator>(I)) &&
         "Expected a UnaryOperator or BinaryOperator!");
  LLVM_DEBUG(dbgs() << "LINEARIZE: " << *I << '\n');
@ -577,18 +578,27 @@ static bool LinearizeExprTree(Instruction *I,
      assert(Op->hasOneUse() && "Has uses outside the expression tree!");

      // If this is a multiply expression, turn any internal negations into
-      // multiplies by -1 so they can be reassociated.
-      if (Instruction *Tmp = dyn_cast<Instruction>(Op))
-        if ((Opcode == Instruction::Mul && match(Tmp, m_Neg(m_Value()))) ||
-            (Opcode == Instruction::FMul && match(Tmp, m_FNeg(m_Value())))) {
-          LLVM_DEBUG(dbgs()
-                     << "MORPH LEAF: " << *Op << " (" << Weight << ") TO ");
-          Tmp = LowerNegateToMultiply(Tmp);
-          LLVM_DEBUG(dbgs() << *Tmp << '\n');
-          Worklist.push_back(std::make_pair(Tmp, Weight));
-          Changed = true;
-          continue;
+      // multiplies by -1 so they can be reassociated.  Add any users of the
+      // newly created multiplication by -1 to the redo list, so any
+      // reassociation opportunities that are exposed will be reassociated
+      // further.
+      Instruction *Neg;
+      if (((Opcode == Instruction::Mul && match(Op, m_Neg(m_Value()))) ||
+           (Opcode == Instruction::FMul && match(Op, m_FNeg(m_Value())))) &&
+           match(Op, m_Instruction(Neg))) {
+        LLVM_DEBUG(dbgs()
+                   << "MORPH LEAF: " << *Op << " (" << Weight << ") TO ");
+        Instruction *Mul = LowerNegateToMultiply(Neg);
+        LLVM_DEBUG(dbgs() << *Mul << '\n');
+        Worklist.push_back(std::make_pair(Mul, Weight));
+        for (User *U : Mul->users()) {
+          if (BinaryOperator *UserBO = dyn_cast<BinaryOperator>(U))
+            ToRedo.insert(UserBO);
        }
+        ToRedo.insert(Neg);
+        Changed = true;
+        continue;
+      }

      // Failed to morph into an expression of the right type.  This really is
      // a leaf.
@ -1141,7 +1151,7 @@ Value *ReassociatePass::RemoveFactorFromExpression(Value *V, Value *Factor) {
    return nullptr;

  SmallVector<RepeatedValue, 8> Tree;
-  MadeChange |= LinearizeExprTree(BO, Tree);
+  MadeChange |= LinearizeExprTree(BO, Tree, RedoInsts);
  SmallVector<ValueEntry, 8> Factors;
  Factors.reserve(Tree.size());
  for (unsigned i = 0, e = Tree.size(); i != e; ++i) {
@ -2320,7 +2330,7 @@ void ReassociatePass::ReassociateExpression(BinaryOperator *I) {
  // First, walk the expression tree, linearizing the tree, collecting the
  // operand information.
  SmallVector<RepeatedValue, 8> Tree;
-  MadeChange |= LinearizeExprTree(I, Tree);
+  MadeChange |= LinearizeExprTree(I, Tree, RedoInsts);
  SmallVector<ValueEntry, 8> Ops;
  Ops.reserve(Tree.size());
  for (const RepeatedValue &E : Tree)
--- a/llvm/test/Transforms/Reassociate/fast-ReassociateVector.ll
+++ b/llvm/test/Transforms/Reassociate/fast-ReassociateVector.ll
@ -281,11 +281,10 @@ define <2 x double> @test9_reassoc_unary_fneg(<2 x double> %b, <2 x double> %a)

 define <2 x float> @test10(<2 x float> %a, <2 x float> %b, <2 x float> %z) {
 ; CHECK-LABEL: @test10(
-; CHECK-NEXT:    [[TMP1:%.*]] = fsub fast <2 x float> zeroinitializer, zeroinitializer
 ; CHECK-NEXT:    [[C:%.*]] = fmul fast <2 x float> [[A:%.*]], <float 4.000000e+01, float 4.000000e+01>
 ; CHECK-NEXT:    [[E:%.*]] = fmul fast <2 x float> [[C]], [[Z:%.*]]
-; CHECK-NEXT:    [[TMP2:%.*]] = fadd fast <2 x float> [[E]], zeroinitializer
-; CHECK-NEXT:    ret <2 x float> [[TMP2]]
+; CHECK-NEXT:    [[TMP1:%.*]] = fadd fast <2 x float> [[E]], zeroinitializer
+; CHECK-NEXT:    ret <2 x float> [[TMP1]]
 ;
  %d = fmul fast <2 x float> %z, <float 4.000000e+01, float 4.000000e+01>
  %c = fsub fast <2 x float> <float 0.000000e+00, float 0.000000e+00>, %d
@ -296,7 +295,6 @@ define <2 x float> @test10(<2 x float> %a, <2 x float> %b, <2 x float> %z) {

 define <2 x float> @test10_unary_fneg(<2 x float> %a, <2 x float> %b, <2 x float> %z) {
 ; CHECK-LABEL: @test10_unary_fneg(
-; CHECK-NEXT:    [[TMP1:%.*]] = fneg fast <2 x float> zeroinitializer
 ; CHECK-NEXT:    [[E:%.*]] = fmul fast <2 x float> [[A:%.*]], <float 4.000000e+01, float 4.000000e+01>
 ; CHECK-NEXT:    [[F:%.*]] = fmul fast <2 x float> [[E]], [[Z:%.*]]
 ; CHECK-NEXT:    ret <2 x float> [[F]]
--- a/llvm/test/Transforms/Reassociate/fast-basictest.ll
+++ b/llvm/test/Transforms/Reassociate/fast-basictest.ll
@ -181,16 +181,12 @@ define float @test6_reassoc(float %A, float %B, float %C) {
 }

 ; (-X)*Y + Z -> Z-X*Y
-; TODO: check why IR transformation of test7 with 'fast' math flag
-; is worse than without it (and even without transformation)

 define float @test7(float %X, float %Y, float %Z) {
 ; CHECK-LABEL: @test7(
-; CHECK-NEXT:    [[TMP1:%.*]] = fsub fast float 0.000000e+00, 0.000000e+00
-; CHECK-NEXT:    [[A:%.*]] = fmul fast float [[Y:%.*]], [[X:%.*]]
-; CHECK-NEXT:    [[B:%.*]] = fmul fast float [[A]], 1.000000e+00
-; CHECK-NEXT:    [[TMP2:%.*]] = fsub fast float [[Z:%.*]], [[B]]
-; CHECK-NEXT:    ret float [[TMP2]]
+; CHECK-NEXT:    [[B:%.*]] = fmul fast float [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = fsub fast float [[Z:%.*]], [[B]]
+; CHECK-NEXT:    ret float [[TMP1]]
 ;
  %A = fsub fast float 0.0, %X
  %B = fmul fast float %A, %Y
@ -200,11 +196,9 @@ define float @test7(float %X, float %Y, float %Z) {

 define float @test7_unary_fneg(float %X, float %Y, float %Z) {
 ; CHECK-LABEL: @test7_unary_fneg(
-; CHECK-NEXT:    [[TMP1:%.*]] = fneg fast float 0.000000e+00
-; CHECK-NEXT:    [[A:%.*]] = fmul fast float [[Y:%.*]], [[X:%.*]]
-; CHECK-NEXT:    [[B:%.*]] = fmul fast float [[A]], 1.000000e+00
-; CHECK-NEXT:    [[TMP2:%.*]] = fsub fast float [[Z:%.*]], [[B]]
-; CHECK-NEXT:    ret float [[TMP2]]
+; CHECK-NEXT:    [[B:%.*]] = fmul fast float [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = fsub fast float [[Z:%.*]], [[B]]
+; CHECK-NEXT:    ret float [[TMP1]]
 ;
  %A = fneg fast float %X
  %B = fmul fast float %A, %Y
@ -239,6 +233,22 @@ define float @test7_reassoc(float %X, float %Y, float %Z) {
  ret float %C
 }

+; Integer version of:
+;   (-X)*Y + Z -> Z-X*Y
+; TODO: check if we can change the mul of -1 and the add to a sub.
+define i32 @test7_int(i32 %X, i32 %Y, i32 %Z) {
+; CHECK-LABEL: @test7_int(
+; CHECK-NEXT:    [[A:%.*]] = mul i32 [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[B:%.*]] = mul i32 [[A]], -1
+; CHECK-NEXT:    [[C:%.*]] = add i32 [[B]], [[Z:%.*]]
+; CHECK-NEXT:    ret i32 [[C]]
+;
+  %A = sub i32 0, %X
+  %B = mul i32 %A, %Y
+  %C = add i32 %B, %Z
+  ret i32 %C
+}
+
 define float @test8(float %X) {
 ; CHECK-LABEL: @test8(
 ; CHECK-NEXT:    [[FACTOR:%.*]] = fmul fast float [[X:%.*]], 9.400000e+01
@ -276,7 +286,6 @@ define float @test10(float %W) {

 define float @test11(float %X) {
 ; CHECK-LABEL: @test11(
-; CHECK-NEXT:    [[TMP1:%.*]] = fneg fast float 0.000000e+00
 ; CHECK-NEXT:    [[FACTOR:%.*]] = fmul fast float [[X:%.*]], -3.000000e+00
 ; CHECK-NEXT:    [[Z:%.*]] = fadd fast float [[FACTOR]], 6.000000e+00
 ; CHECK-NEXT:    ret float [[Z]]
@ -289,17 +298,12 @@ define float @test11(float %X) {
  ret float %Z
 }

-; TODO: check why IR transformation of test12 with 'fast' math flag
-; is worse than without it (and even without transformation)
-
 define float @test12(float %X1, float %X2, float %X3) {
 ; CHECK-LABEL: @test12(
-; CHECK-NEXT:    [[TMP1:%.*]] = fsub fast float 0.000000e+00, 0.000000e+00
-; CHECK-NEXT:    [[A:%.*]] = fmul fast float [[X2:%.*]], [[X1:%.*]]
-; CHECK-NEXT:    [[B:%.*]] = fmul fast float [[A]], 1.000000e+00
+; CHECK-NEXT:    [[B:%.*]] = fmul fast float [[X2:%.*]], [[X1:%.*]]
 ; CHECK-NEXT:    [[C:%.*]] = fmul fast float [[X3:%.*]], [[X1]]
-; CHECK-NEXT:    [[TMP2:%.*]] = fsub fast float [[C]], [[B]]
-; CHECK-NEXT:    ret float [[TMP2]]
+; CHECK-NEXT:    [[TMP1:%.*]] = fsub fast float [[C]], [[B]]
+; CHECK-NEXT:    ret float [[TMP1]]
 ;
  %A = fsub fast float 0.000000e+00, %X1
  %B = fmul fast float %A, %X2   ; -X1*X2
@ -310,12 +314,10 @@ define float @test12(float %X1, float %X2, float %X3) {

 define float @test12_unary_fneg(float %X1, float %X2, float %X3) {
 ; CHECK-LABEL: @test12_unary_fneg(
-; CHECK-NEXT:    [[TMP1:%.*]] = fneg fast float 0.000000e+00
-; CHECK-NEXT:    [[A:%.*]] = fmul fast float [[X2:%.*]], [[X1:%.*]]
-; CHECK-NEXT:    [[B:%.*]] = fmul fast float [[A]], 1.000000e+00
+; CHECK-NEXT:    [[B:%.*]] = fmul fast float [[X2:%.*]], [[X1:%.*]]
 ; CHECK-NEXT:    [[C:%.*]] = fmul fast float [[X3:%.*]], [[X1]]
-; CHECK-NEXT:    [[TMP2:%.*]] = fsub fast float [[C]], [[B]]
-; CHECK-NEXT:    ret float [[TMP2]]
+; CHECK-NEXT:    [[TMP1:%.*]] = fsub fast float [[C]], [[B]]
+; CHECK-NEXT:    ret float [[TMP1]]
 ;
  %A = fneg fast float %X1
  %B = fmul fast float %A, %X2   ; -X1*X2
@ -490,12 +492,11 @@ define float @test15_reassoc(float %b, float %a) {

 define float @test16(float %a, float %b, float %z) {
 ; CHECK-LABEL: @test16(
-; CHECK-NEXT:    [[TMP1:%.*]] = fsub fast float 0.000000e+00, 0.000000e+00
 ; CHECK-NEXT:    [[C:%.*]] = fmul fast float [[A:%.*]], 1.234500e+04
 ; CHECK-NEXT:    [[E:%.*]] = fmul fast float [[C]], [[B:%.*]]
 ; CHECK-NEXT:    [[F:%.*]] = fmul fast float [[E]], [[Z:%.*]]
-; CHECK-NEXT:    [[TMP2:%.*]] = fadd fast float [[F]], 0.000000e+00
-; CHECK-NEXT:    ret float [[TMP2]]
+; CHECK-NEXT:    [[TMP1:%.*]] = fadd fast float [[F]], 0.000000e+00
+; CHECK-NEXT:    ret float [[TMP1]]
 ;
  %c = fsub fast float 0.000000e+00, %z
  %d = fmul fast float %a, %b
@ -507,7 +508,6 @@ define float @test16(float %a, float %b, float %z) {

 define float @test16_unary_fneg(float %a, float %b, float %z) {
 ; CHECK-LABEL: @test16_unary_fneg(
-; CHECK-NEXT:    [[TMP1:%.*]] = fneg fast float 0.000000e+00
 ; CHECK-NEXT:    [[E:%.*]] = fmul fast float [[A:%.*]], 1.234500e+04
 ; CHECK-NEXT:    [[F:%.*]] = fmul fast float [[E]], [[B:%.*]]
 ; CHECK-NEXT:    [[G:%.*]] = fmul fast float [[F]], [[Z:%.*]]
@ -539,16 +539,14 @@ define float @test16_reassoc(float %a, float %b, float %z) {
 }

 ; TODO: check if we can remove:
-; - fsub fast 0, 0
 ; - fadd fast x, 0
 ; ... as 'fast' implies 'nsz'
 define float @test17(float %a, float %b, float %z) {
 ; CHECK-LABEL: @test17(
-; CHECK-NEXT:    [[TMP1:%.*]] = fsub fast float 0.000000e+00, 0.000000e+00
 ; CHECK-NEXT:    [[C:%.*]] = fmul fast float [[A:%.*]], 4.000000e+01
 ; CHECK-NEXT:    [[E:%.*]] = fmul fast float [[C]], [[Z:%.*]]
-; CHECK-NEXT:    [[TMP2:%.*]] = fadd fast float [[E]], 0.000000e+00
-; CHECK-NEXT:    ret float [[TMP2]]
+; CHECK-NEXT:    [[TMP1:%.*]] = fadd fast float [[E]], 0.000000e+00
+; CHECK-NEXT:    ret float [[TMP1]]
 ;
  %d = fmul fast float %z, 4.000000e+01
  %c = fsub fast float 0.000000e+00, %d
@ -557,10 +555,8 @@ define float @test17(float %a, float %b, float %z) {
  ret float %f
 }

-; TODO: check if we can remove fneg fast 0 as 'fast' implies 'nsz'
 define float @test17_unary_fneg(float %a, float %b, float %z) {
 ; CHECK-LABEL: @test17_unary_fneg(
-; CHECK-NEXT:    [[TMP1:%.*]] = fneg fast float 0.000000e+00
 ; CHECK-NEXT:    [[E:%.*]] = fmul fast float [[A:%.*]], 4.000000e+01
 ; CHECK-NEXT:    [[F:%.*]] = fmul fast float [[E]], [[Z:%.*]]
 ; CHECK-NEXT:    ret float [[F]]
--- a/llvm/test/Transforms/Reassociate/pr42349.ll
+++ b/llvm/test/Transforms/Reassociate/pr42349.ll
@ -5,7 +5,6 @@ define  float @wibble(float %tmp6) #0 {
 ; CHECK-LABEL: @wibble(
 ; CHECK-NEXT:  bb:
 ; CHECK-NEXT:    [[TMP7:%.*]] = fmul float [[TMP6:%.*]], -1.000000e+00
-; CHECK-NEXT:    [[TMP0:%.*]] = fsub float -0.000000e+00, 0.000000e+00
 ; CHECK-NEXT:    [[TMP9:%.*]] = fmul fast float [[TMP6]], 0xFFF0000000000000
 ; CHECK-NEXT:    ret float [[TMP9]]
 ;