[Reassociate] Cleanup minor missed optimizations

In analyzing issue #56483, it was noticed that running `opt` with
`-reassociate` was missing some minor optimizations. For example,
there were cases where the running `opt` on IR with floating-point
instructions that have the `fast` flags applied, sometimes resulted in
less efficient code than the input IR (things like dead instructions
left behind, and missed reassociations). These were sometimes noted
in the test-files with TODOs, to investigate further. This commit
fixes some of these problems, removing some TODOs in the process.

FTR, I refer to these as "minor" missed optimizations, because when
running a full clang/llvm compilation, these inefficiencies are not
happening, as other passes clean that residue up. Regardless, having
cleaner IR produced by `opt`, makes assessing the quality of fixes done
in `opt` easier.
This commit is contained in:
Warren Ristow 2022-07-14 08:21:04 -07:00
parent 486787210d
commit 230c8c56f2
4 changed files with 58 additions and 55 deletions

View File

@ -449,7 +449,8 @@ using RepeatedValue = std::pair<Value*, APInt>;
/// of the expression) if it can turn them into binary operators of the right
/// type and thus make the expression bigger.
static bool LinearizeExprTree(Instruction *I,
SmallVectorImpl<RepeatedValue> &Ops) {
SmallVectorImpl<RepeatedValue> &Ops,
ReassociatePass::OrderedSet &ToRedo) {
assert((isa<UnaryOperator>(I) || isa<BinaryOperator>(I)) &&
"Expected a UnaryOperator or BinaryOperator!");
LLVM_DEBUG(dbgs() << "LINEARIZE: " << *I << '\n');
@ -577,18 +578,27 @@ static bool LinearizeExprTree(Instruction *I,
assert(Op->hasOneUse() && "Has uses outside the expression tree!");
// If this is a multiply expression, turn any internal negations into
// multiplies by -1 so they can be reassociated.
if (Instruction *Tmp = dyn_cast<Instruction>(Op))
if ((Opcode == Instruction::Mul && match(Tmp, m_Neg(m_Value()))) ||
(Opcode == Instruction::FMul && match(Tmp, m_FNeg(m_Value())))) {
LLVM_DEBUG(dbgs()
<< "MORPH LEAF: " << *Op << " (" << Weight << ") TO ");
Tmp = LowerNegateToMultiply(Tmp);
LLVM_DEBUG(dbgs() << *Tmp << '\n');
Worklist.push_back(std::make_pair(Tmp, Weight));
Changed = true;
continue;
// multiplies by -1 so they can be reassociated. Add any users of the
// newly created multiplication by -1 to the redo list, so any
// reassociation opportunities that are exposed will be reassociated
// further.
Instruction *Neg;
if (((Opcode == Instruction::Mul && match(Op, m_Neg(m_Value()))) ||
(Opcode == Instruction::FMul && match(Op, m_FNeg(m_Value())))) &&
match(Op, m_Instruction(Neg))) {
LLVM_DEBUG(dbgs()
<< "MORPH LEAF: " << *Op << " (" << Weight << ") TO ");
Instruction *Mul = LowerNegateToMultiply(Neg);
LLVM_DEBUG(dbgs() << *Mul << '\n');
Worklist.push_back(std::make_pair(Mul, Weight));
for (User *U : Mul->users()) {
if (BinaryOperator *UserBO = dyn_cast<BinaryOperator>(U))
ToRedo.insert(UserBO);
}
ToRedo.insert(Neg);
Changed = true;
continue;
}
// Failed to morph into an expression of the right type. This really is
// a leaf.
@ -1141,7 +1151,7 @@ Value *ReassociatePass::RemoveFactorFromExpression(Value *V, Value *Factor) {
return nullptr;
SmallVector<RepeatedValue, 8> Tree;
MadeChange |= LinearizeExprTree(BO, Tree);
MadeChange |= LinearizeExprTree(BO, Tree, RedoInsts);
SmallVector<ValueEntry, 8> Factors;
Factors.reserve(Tree.size());
for (unsigned i = 0, e = Tree.size(); i != e; ++i) {
@ -2320,7 +2330,7 @@ void ReassociatePass::ReassociateExpression(BinaryOperator *I) {
// First, walk the expression tree, linearizing the tree, collecting the
// operand information.
SmallVector<RepeatedValue, 8> Tree;
MadeChange |= LinearizeExprTree(I, Tree);
MadeChange |= LinearizeExprTree(I, Tree, RedoInsts);
SmallVector<ValueEntry, 8> Ops;
Ops.reserve(Tree.size());
for (const RepeatedValue &E : Tree)

View File

@ -281,11 +281,10 @@ define <2 x double> @test9_reassoc_unary_fneg(<2 x double> %b, <2 x double> %a)
define <2 x float> @test10(<2 x float> %a, <2 x float> %b, <2 x float> %z) {
; CHECK-LABEL: @test10(
; CHECK-NEXT: [[TMP1:%.*]] = fsub fast <2 x float> zeroinitializer, zeroinitializer
; CHECK-NEXT: [[C:%.*]] = fmul fast <2 x float> [[A:%.*]], <float 4.000000e+01, float 4.000000e+01>
; CHECK-NEXT: [[E:%.*]] = fmul fast <2 x float> [[C]], [[Z:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = fadd fast <2 x float> [[E]], zeroinitializer
; CHECK-NEXT: ret <2 x float> [[TMP2]]
; CHECK-NEXT: [[TMP1:%.*]] = fadd fast <2 x float> [[E]], zeroinitializer
; CHECK-NEXT: ret <2 x float> [[TMP1]]
;
%d = fmul fast <2 x float> %z, <float 4.000000e+01, float 4.000000e+01>
%c = fsub fast <2 x float> <float 0.000000e+00, float 0.000000e+00>, %d
@ -296,7 +295,6 @@ define <2 x float> @test10(<2 x float> %a, <2 x float> %b, <2 x float> %z) {
define <2 x float> @test10_unary_fneg(<2 x float> %a, <2 x float> %b, <2 x float> %z) {
; CHECK-LABEL: @test10_unary_fneg(
; CHECK-NEXT: [[TMP1:%.*]] = fneg fast <2 x float> zeroinitializer
; CHECK-NEXT: [[E:%.*]] = fmul fast <2 x float> [[A:%.*]], <float 4.000000e+01, float 4.000000e+01>
; CHECK-NEXT: [[F:%.*]] = fmul fast <2 x float> [[E]], [[Z:%.*]]
; CHECK-NEXT: ret <2 x float> [[F]]

View File

@ -181,16 +181,12 @@ define float @test6_reassoc(float %A, float %B, float %C) {
}
; (-X)*Y + Z -> Z-X*Y
; TODO: check why IR transformation of test7 with 'fast' math flag
; is worse than without it (and even without transformation)
define float @test7(float %X, float %Y, float %Z) {
; CHECK-LABEL: @test7(
; CHECK-NEXT: [[TMP1:%.*]] = fsub fast float 0.000000e+00, 0.000000e+00
; CHECK-NEXT: [[A:%.*]] = fmul fast float [[Y:%.*]], [[X:%.*]]
; CHECK-NEXT: [[B:%.*]] = fmul fast float [[A]], 1.000000e+00
; CHECK-NEXT: [[TMP2:%.*]] = fsub fast float [[Z:%.*]], [[B]]
; CHECK-NEXT: ret float [[TMP2]]
; CHECK-NEXT: [[B:%.*]] = fmul fast float [[Y:%.*]], [[X:%.*]]
; CHECK-NEXT: [[TMP1:%.*]] = fsub fast float [[Z:%.*]], [[B]]
; CHECK-NEXT: ret float [[TMP1]]
;
%A = fsub fast float 0.0, %X
%B = fmul fast float %A, %Y
@ -200,11 +196,9 @@ define float @test7(float %X, float %Y, float %Z) {
define float @test7_unary_fneg(float %X, float %Y, float %Z) {
; CHECK-LABEL: @test7_unary_fneg(
; CHECK-NEXT: [[TMP1:%.*]] = fneg fast float 0.000000e+00
; CHECK-NEXT: [[A:%.*]] = fmul fast float [[Y:%.*]], [[X:%.*]]
; CHECK-NEXT: [[B:%.*]] = fmul fast float [[A]], 1.000000e+00
; CHECK-NEXT: [[TMP2:%.*]] = fsub fast float [[Z:%.*]], [[B]]
; CHECK-NEXT: ret float [[TMP2]]
; CHECK-NEXT: [[B:%.*]] = fmul fast float [[Y:%.*]], [[X:%.*]]
; CHECK-NEXT: [[TMP1:%.*]] = fsub fast float [[Z:%.*]], [[B]]
; CHECK-NEXT: ret float [[TMP1]]
;
%A = fneg fast float %X
%B = fmul fast float %A, %Y
@ -239,6 +233,22 @@ define float @test7_reassoc(float %X, float %Y, float %Z) {
ret float %C
}
; Integer version of:
; (-X)*Y + Z -> Z-X*Y
; TODO: check if we can change the mul of -1 and the add to a sub.
define i32 @test7_int(i32 %X, i32 %Y, i32 %Z) {
; CHECK-LABEL: @test7_int(
; CHECK-NEXT: [[A:%.*]] = mul i32 [[Y:%.*]], [[X:%.*]]
; CHECK-NEXT: [[B:%.*]] = mul i32 [[A]], -1
; CHECK-NEXT: [[C:%.*]] = add i32 [[B]], [[Z:%.*]]
; CHECK-NEXT: ret i32 [[C]]
;
%A = sub i32 0, %X
%B = mul i32 %A, %Y
%C = add i32 %B, %Z
ret i32 %C
}
define float @test8(float %X) {
; CHECK-LABEL: @test8(
; CHECK-NEXT: [[FACTOR:%.*]] = fmul fast float [[X:%.*]], 9.400000e+01
@ -276,7 +286,6 @@ define float @test10(float %W) {
define float @test11(float %X) {
; CHECK-LABEL: @test11(
; CHECK-NEXT: [[TMP1:%.*]] = fneg fast float 0.000000e+00
; CHECK-NEXT: [[FACTOR:%.*]] = fmul fast float [[X:%.*]], -3.000000e+00
; CHECK-NEXT: [[Z:%.*]] = fadd fast float [[FACTOR]], 6.000000e+00
; CHECK-NEXT: ret float [[Z]]
@ -289,17 +298,12 @@ define float @test11(float %X) {
ret float %Z
}
; TODO: check why IR transformation of test12 with 'fast' math flag
; is worse than without it (and even without transformation)
define float @test12(float %X1, float %X2, float %X3) {
; CHECK-LABEL: @test12(
; CHECK-NEXT: [[TMP1:%.*]] = fsub fast float 0.000000e+00, 0.000000e+00
; CHECK-NEXT: [[A:%.*]] = fmul fast float [[X2:%.*]], [[X1:%.*]]
; CHECK-NEXT: [[B:%.*]] = fmul fast float [[A]], 1.000000e+00
; CHECK-NEXT: [[B:%.*]] = fmul fast float [[X2:%.*]], [[X1:%.*]]
; CHECK-NEXT: [[C:%.*]] = fmul fast float [[X3:%.*]], [[X1]]
; CHECK-NEXT: [[TMP2:%.*]] = fsub fast float [[C]], [[B]]
; CHECK-NEXT: ret float [[TMP2]]
; CHECK-NEXT: [[TMP1:%.*]] = fsub fast float [[C]], [[B]]
; CHECK-NEXT: ret float [[TMP1]]
;
%A = fsub fast float 0.000000e+00, %X1
%B = fmul fast float %A, %X2 ; -X1*X2
@ -310,12 +314,10 @@ define float @test12(float %X1, float %X2, float %X3) {
define float @test12_unary_fneg(float %X1, float %X2, float %X3) {
; CHECK-LABEL: @test12_unary_fneg(
; CHECK-NEXT: [[TMP1:%.*]] = fneg fast float 0.000000e+00
; CHECK-NEXT: [[A:%.*]] = fmul fast float [[X2:%.*]], [[X1:%.*]]
; CHECK-NEXT: [[B:%.*]] = fmul fast float [[A]], 1.000000e+00
; CHECK-NEXT: [[B:%.*]] = fmul fast float [[X2:%.*]], [[X1:%.*]]
; CHECK-NEXT: [[C:%.*]] = fmul fast float [[X3:%.*]], [[X1]]
; CHECK-NEXT: [[TMP2:%.*]] = fsub fast float [[C]], [[B]]
; CHECK-NEXT: ret float [[TMP2]]
; CHECK-NEXT: [[TMP1:%.*]] = fsub fast float [[C]], [[B]]
; CHECK-NEXT: ret float [[TMP1]]
;
%A = fneg fast float %X1
%B = fmul fast float %A, %X2 ; -X1*X2
@ -490,12 +492,11 @@ define float @test15_reassoc(float %b, float %a) {
define float @test16(float %a, float %b, float %z) {
; CHECK-LABEL: @test16(
; CHECK-NEXT: [[TMP1:%.*]] = fsub fast float 0.000000e+00, 0.000000e+00
; CHECK-NEXT: [[C:%.*]] = fmul fast float [[A:%.*]], 1.234500e+04
; CHECK-NEXT: [[E:%.*]] = fmul fast float [[C]], [[B:%.*]]
; CHECK-NEXT: [[F:%.*]] = fmul fast float [[E]], [[Z:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = fadd fast float [[F]], 0.000000e+00
; CHECK-NEXT: ret float [[TMP2]]
; CHECK-NEXT: [[TMP1:%.*]] = fadd fast float [[F]], 0.000000e+00
; CHECK-NEXT: ret float [[TMP1]]
;
%c = fsub fast float 0.000000e+00, %z
%d = fmul fast float %a, %b
@ -507,7 +508,6 @@ define float @test16(float %a, float %b, float %z) {
define float @test16_unary_fneg(float %a, float %b, float %z) {
; CHECK-LABEL: @test16_unary_fneg(
; CHECK-NEXT: [[TMP1:%.*]] = fneg fast float 0.000000e+00
; CHECK-NEXT: [[E:%.*]] = fmul fast float [[A:%.*]], 1.234500e+04
; CHECK-NEXT: [[F:%.*]] = fmul fast float [[E]], [[B:%.*]]
; CHECK-NEXT: [[G:%.*]] = fmul fast float [[F]], [[Z:%.*]]
@ -539,16 +539,14 @@ define float @test16_reassoc(float %a, float %b, float %z) {
}
; TODO: check if we can remove:
; - fsub fast 0, 0
; - fadd fast x, 0
; ... as 'fast' implies 'nsz'
define float @test17(float %a, float %b, float %z) {
; CHECK-LABEL: @test17(
; CHECK-NEXT: [[TMP1:%.*]] = fsub fast float 0.000000e+00, 0.000000e+00
; CHECK-NEXT: [[C:%.*]] = fmul fast float [[A:%.*]], 4.000000e+01
; CHECK-NEXT: [[E:%.*]] = fmul fast float [[C]], [[Z:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = fadd fast float [[E]], 0.000000e+00
; CHECK-NEXT: ret float [[TMP2]]
; CHECK-NEXT: [[TMP1:%.*]] = fadd fast float [[E]], 0.000000e+00
; CHECK-NEXT: ret float [[TMP1]]
;
%d = fmul fast float %z, 4.000000e+01
%c = fsub fast float 0.000000e+00, %d
@ -557,10 +555,8 @@ define float @test17(float %a, float %b, float %z) {
ret float %f
}
; TODO: check if we can remove fneg fast 0 as 'fast' implies 'nsz'
define float @test17_unary_fneg(float %a, float %b, float %z) {
; CHECK-LABEL: @test17_unary_fneg(
; CHECK-NEXT: [[TMP1:%.*]] = fneg fast float 0.000000e+00
; CHECK-NEXT: [[E:%.*]] = fmul fast float [[A:%.*]], 4.000000e+01
; CHECK-NEXT: [[F:%.*]] = fmul fast float [[E]], [[Z:%.*]]
; CHECK-NEXT: ret float [[F]]

View File

@ -5,7 +5,6 @@ define float @wibble(float %tmp6) #0 {
; CHECK-LABEL: @wibble(
; CHECK-NEXT: bb:
; CHECK-NEXT: [[TMP7:%.*]] = fmul float [[TMP6:%.*]], -1.000000e+00
; CHECK-NEXT: [[TMP0:%.*]] = fsub float -0.000000e+00, 0.000000e+00
; CHECK-NEXT: [[TMP9:%.*]] = fmul fast float [[TMP6]], 0xFFF0000000000000
; CHECK-NEXT: ret float [[TMP9]]
;