forked from OSchip/llvm-project
[Reassociate] Cleanup minor missed optimizations
In analyzing issue #56483, it was noticed that running `opt` with `-reassociate` was missing some minor optimizations. For example, there were cases where the running `opt` on IR with floating-point instructions that have the `fast` flags applied, sometimes resulted in less efficient code than the input IR (things like dead instructions left behind, and missed reassociations). These were sometimes noted in the test-files with TODOs, to investigate further. This commit fixes some of these problems, removing some TODOs in the process. FTR, I refer to these as "minor" missed optimizations, because when running a full clang/llvm compilation, these inefficiencies are not happening, as other passes clean that residue up. Regardless, having cleaner IR produced by `opt`, makes assessing the quality of fixes done in `opt` easier.
This commit is contained in:
parent
486787210d
commit
230c8c56f2
|
@ -449,7 +449,8 @@ using RepeatedValue = std::pair<Value*, APInt>;
|
|||
/// of the expression) if it can turn them into binary operators of the right
|
||||
/// type and thus make the expression bigger.
|
||||
static bool LinearizeExprTree(Instruction *I,
|
||||
SmallVectorImpl<RepeatedValue> &Ops) {
|
||||
SmallVectorImpl<RepeatedValue> &Ops,
|
||||
ReassociatePass::OrderedSet &ToRedo) {
|
||||
assert((isa<UnaryOperator>(I) || isa<BinaryOperator>(I)) &&
|
||||
"Expected a UnaryOperator or BinaryOperator!");
|
||||
LLVM_DEBUG(dbgs() << "LINEARIZE: " << *I << '\n');
|
||||
|
@ -577,18 +578,27 @@ static bool LinearizeExprTree(Instruction *I,
|
|||
assert(Op->hasOneUse() && "Has uses outside the expression tree!");
|
||||
|
||||
// If this is a multiply expression, turn any internal negations into
|
||||
// multiplies by -1 so they can be reassociated.
|
||||
if (Instruction *Tmp = dyn_cast<Instruction>(Op))
|
||||
if ((Opcode == Instruction::Mul && match(Tmp, m_Neg(m_Value()))) ||
|
||||
(Opcode == Instruction::FMul && match(Tmp, m_FNeg(m_Value())))) {
|
||||
LLVM_DEBUG(dbgs()
|
||||
<< "MORPH LEAF: " << *Op << " (" << Weight << ") TO ");
|
||||
Tmp = LowerNegateToMultiply(Tmp);
|
||||
LLVM_DEBUG(dbgs() << *Tmp << '\n');
|
||||
Worklist.push_back(std::make_pair(Tmp, Weight));
|
||||
Changed = true;
|
||||
continue;
|
||||
// multiplies by -1 so they can be reassociated. Add any users of the
|
||||
// newly created multiplication by -1 to the redo list, so any
|
||||
// reassociation opportunities that are exposed will be reassociated
|
||||
// further.
|
||||
Instruction *Neg;
|
||||
if (((Opcode == Instruction::Mul && match(Op, m_Neg(m_Value()))) ||
|
||||
(Opcode == Instruction::FMul && match(Op, m_FNeg(m_Value())))) &&
|
||||
match(Op, m_Instruction(Neg))) {
|
||||
LLVM_DEBUG(dbgs()
|
||||
<< "MORPH LEAF: " << *Op << " (" << Weight << ") TO ");
|
||||
Instruction *Mul = LowerNegateToMultiply(Neg);
|
||||
LLVM_DEBUG(dbgs() << *Mul << '\n');
|
||||
Worklist.push_back(std::make_pair(Mul, Weight));
|
||||
for (User *U : Mul->users()) {
|
||||
if (BinaryOperator *UserBO = dyn_cast<BinaryOperator>(U))
|
||||
ToRedo.insert(UserBO);
|
||||
}
|
||||
ToRedo.insert(Neg);
|
||||
Changed = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Failed to morph into an expression of the right type. This really is
|
||||
// a leaf.
|
||||
|
@ -1141,7 +1151,7 @@ Value *ReassociatePass::RemoveFactorFromExpression(Value *V, Value *Factor) {
|
|||
return nullptr;
|
||||
|
||||
SmallVector<RepeatedValue, 8> Tree;
|
||||
MadeChange |= LinearizeExprTree(BO, Tree);
|
||||
MadeChange |= LinearizeExprTree(BO, Tree, RedoInsts);
|
||||
SmallVector<ValueEntry, 8> Factors;
|
||||
Factors.reserve(Tree.size());
|
||||
for (unsigned i = 0, e = Tree.size(); i != e; ++i) {
|
||||
|
@ -2320,7 +2330,7 @@ void ReassociatePass::ReassociateExpression(BinaryOperator *I) {
|
|||
// First, walk the expression tree, linearizing the tree, collecting the
|
||||
// operand information.
|
||||
SmallVector<RepeatedValue, 8> Tree;
|
||||
MadeChange |= LinearizeExprTree(I, Tree);
|
||||
MadeChange |= LinearizeExprTree(I, Tree, RedoInsts);
|
||||
SmallVector<ValueEntry, 8> Ops;
|
||||
Ops.reserve(Tree.size());
|
||||
for (const RepeatedValue &E : Tree)
|
||||
|
|
|
@ -281,11 +281,10 @@ define <2 x double> @test9_reassoc_unary_fneg(<2 x double> %b, <2 x double> %a)
|
|||
|
||||
define <2 x float> @test10(<2 x float> %a, <2 x float> %b, <2 x float> %z) {
|
||||
; CHECK-LABEL: @test10(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = fsub fast <2 x float> zeroinitializer, zeroinitializer
|
||||
; CHECK-NEXT: [[C:%.*]] = fmul fast <2 x float> [[A:%.*]], <float 4.000000e+01, float 4.000000e+01>
|
||||
; CHECK-NEXT: [[E:%.*]] = fmul fast <2 x float> [[C]], [[Z:%.*]]
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = fadd fast <2 x float> [[E]], zeroinitializer
|
||||
; CHECK-NEXT: ret <2 x float> [[TMP2]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = fadd fast <2 x float> [[E]], zeroinitializer
|
||||
; CHECK-NEXT: ret <2 x float> [[TMP1]]
|
||||
;
|
||||
%d = fmul fast <2 x float> %z, <float 4.000000e+01, float 4.000000e+01>
|
||||
%c = fsub fast <2 x float> <float 0.000000e+00, float 0.000000e+00>, %d
|
||||
|
@ -296,7 +295,6 @@ define <2 x float> @test10(<2 x float> %a, <2 x float> %b, <2 x float> %z) {
|
|||
|
||||
define <2 x float> @test10_unary_fneg(<2 x float> %a, <2 x float> %b, <2 x float> %z) {
|
||||
; CHECK-LABEL: @test10_unary_fneg(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = fneg fast <2 x float> zeroinitializer
|
||||
; CHECK-NEXT: [[E:%.*]] = fmul fast <2 x float> [[A:%.*]], <float 4.000000e+01, float 4.000000e+01>
|
||||
; CHECK-NEXT: [[F:%.*]] = fmul fast <2 x float> [[E]], [[Z:%.*]]
|
||||
; CHECK-NEXT: ret <2 x float> [[F]]
|
||||
|
|
|
@ -181,16 +181,12 @@ define float @test6_reassoc(float %A, float %B, float %C) {
|
|||
}
|
||||
|
||||
; (-X)*Y + Z -> Z-X*Y
|
||||
; TODO: check why IR transformation of test7 with 'fast' math flag
|
||||
; is worse than without it (and even without transformation)
|
||||
|
||||
define float @test7(float %X, float %Y, float %Z) {
|
||||
; CHECK-LABEL: @test7(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = fsub fast float 0.000000e+00, 0.000000e+00
|
||||
; CHECK-NEXT: [[A:%.*]] = fmul fast float [[Y:%.*]], [[X:%.*]]
|
||||
; CHECK-NEXT: [[B:%.*]] = fmul fast float [[A]], 1.000000e+00
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = fsub fast float [[Z:%.*]], [[B]]
|
||||
; CHECK-NEXT: ret float [[TMP2]]
|
||||
; CHECK-NEXT: [[B:%.*]] = fmul fast float [[Y:%.*]], [[X:%.*]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = fsub fast float [[Z:%.*]], [[B]]
|
||||
; CHECK-NEXT: ret float [[TMP1]]
|
||||
;
|
||||
%A = fsub fast float 0.0, %X
|
||||
%B = fmul fast float %A, %Y
|
||||
|
@ -200,11 +196,9 @@ define float @test7(float %X, float %Y, float %Z) {
|
|||
|
||||
define float @test7_unary_fneg(float %X, float %Y, float %Z) {
|
||||
; CHECK-LABEL: @test7_unary_fneg(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = fneg fast float 0.000000e+00
|
||||
; CHECK-NEXT: [[A:%.*]] = fmul fast float [[Y:%.*]], [[X:%.*]]
|
||||
; CHECK-NEXT: [[B:%.*]] = fmul fast float [[A]], 1.000000e+00
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = fsub fast float [[Z:%.*]], [[B]]
|
||||
; CHECK-NEXT: ret float [[TMP2]]
|
||||
; CHECK-NEXT: [[B:%.*]] = fmul fast float [[Y:%.*]], [[X:%.*]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = fsub fast float [[Z:%.*]], [[B]]
|
||||
; CHECK-NEXT: ret float [[TMP1]]
|
||||
;
|
||||
%A = fneg fast float %X
|
||||
%B = fmul fast float %A, %Y
|
||||
|
@ -239,6 +233,22 @@ define float @test7_reassoc(float %X, float %Y, float %Z) {
|
|||
ret float %C
|
||||
}
|
||||
|
||||
; Integer version of:
|
||||
; (-X)*Y + Z -> Z-X*Y
|
||||
; TODO: check if we can change the mul of -1 and the add to a sub.
|
||||
define i32 @test7_int(i32 %X, i32 %Y, i32 %Z) {
|
||||
; CHECK-LABEL: @test7_int(
|
||||
; CHECK-NEXT: [[A:%.*]] = mul i32 [[Y:%.*]], [[X:%.*]]
|
||||
; CHECK-NEXT: [[B:%.*]] = mul i32 [[A]], -1
|
||||
; CHECK-NEXT: [[C:%.*]] = add i32 [[B]], [[Z:%.*]]
|
||||
; CHECK-NEXT: ret i32 [[C]]
|
||||
;
|
||||
%A = sub i32 0, %X
|
||||
%B = mul i32 %A, %Y
|
||||
%C = add i32 %B, %Z
|
||||
ret i32 %C
|
||||
}
|
||||
|
||||
define float @test8(float %X) {
|
||||
; CHECK-LABEL: @test8(
|
||||
; CHECK-NEXT: [[FACTOR:%.*]] = fmul fast float [[X:%.*]], 9.400000e+01
|
||||
|
@ -276,7 +286,6 @@ define float @test10(float %W) {
|
|||
|
||||
define float @test11(float %X) {
|
||||
; CHECK-LABEL: @test11(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = fneg fast float 0.000000e+00
|
||||
; CHECK-NEXT: [[FACTOR:%.*]] = fmul fast float [[X:%.*]], -3.000000e+00
|
||||
; CHECK-NEXT: [[Z:%.*]] = fadd fast float [[FACTOR]], 6.000000e+00
|
||||
; CHECK-NEXT: ret float [[Z]]
|
||||
|
@ -289,17 +298,12 @@ define float @test11(float %X) {
|
|||
ret float %Z
|
||||
}
|
||||
|
||||
; TODO: check why IR transformation of test12 with 'fast' math flag
|
||||
; is worse than without it (and even without transformation)
|
||||
|
||||
define float @test12(float %X1, float %X2, float %X3) {
|
||||
; CHECK-LABEL: @test12(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = fsub fast float 0.000000e+00, 0.000000e+00
|
||||
; CHECK-NEXT: [[A:%.*]] = fmul fast float [[X2:%.*]], [[X1:%.*]]
|
||||
; CHECK-NEXT: [[B:%.*]] = fmul fast float [[A]], 1.000000e+00
|
||||
; CHECK-NEXT: [[B:%.*]] = fmul fast float [[X2:%.*]], [[X1:%.*]]
|
||||
; CHECK-NEXT: [[C:%.*]] = fmul fast float [[X3:%.*]], [[X1]]
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = fsub fast float [[C]], [[B]]
|
||||
; CHECK-NEXT: ret float [[TMP2]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = fsub fast float [[C]], [[B]]
|
||||
; CHECK-NEXT: ret float [[TMP1]]
|
||||
;
|
||||
%A = fsub fast float 0.000000e+00, %X1
|
||||
%B = fmul fast float %A, %X2 ; -X1*X2
|
||||
|
@ -310,12 +314,10 @@ define float @test12(float %X1, float %X2, float %X3) {
|
|||
|
||||
define float @test12_unary_fneg(float %X1, float %X2, float %X3) {
|
||||
; CHECK-LABEL: @test12_unary_fneg(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = fneg fast float 0.000000e+00
|
||||
; CHECK-NEXT: [[A:%.*]] = fmul fast float [[X2:%.*]], [[X1:%.*]]
|
||||
; CHECK-NEXT: [[B:%.*]] = fmul fast float [[A]], 1.000000e+00
|
||||
; CHECK-NEXT: [[B:%.*]] = fmul fast float [[X2:%.*]], [[X1:%.*]]
|
||||
; CHECK-NEXT: [[C:%.*]] = fmul fast float [[X3:%.*]], [[X1]]
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = fsub fast float [[C]], [[B]]
|
||||
; CHECK-NEXT: ret float [[TMP2]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = fsub fast float [[C]], [[B]]
|
||||
; CHECK-NEXT: ret float [[TMP1]]
|
||||
;
|
||||
%A = fneg fast float %X1
|
||||
%B = fmul fast float %A, %X2 ; -X1*X2
|
||||
|
@ -490,12 +492,11 @@ define float @test15_reassoc(float %b, float %a) {
|
|||
|
||||
define float @test16(float %a, float %b, float %z) {
|
||||
; CHECK-LABEL: @test16(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = fsub fast float 0.000000e+00, 0.000000e+00
|
||||
; CHECK-NEXT: [[C:%.*]] = fmul fast float [[A:%.*]], 1.234500e+04
|
||||
; CHECK-NEXT: [[E:%.*]] = fmul fast float [[C]], [[B:%.*]]
|
||||
; CHECK-NEXT: [[F:%.*]] = fmul fast float [[E]], [[Z:%.*]]
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = fadd fast float [[F]], 0.000000e+00
|
||||
; CHECK-NEXT: ret float [[TMP2]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = fadd fast float [[F]], 0.000000e+00
|
||||
; CHECK-NEXT: ret float [[TMP1]]
|
||||
;
|
||||
%c = fsub fast float 0.000000e+00, %z
|
||||
%d = fmul fast float %a, %b
|
||||
|
@ -507,7 +508,6 @@ define float @test16(float %a, float %b, float %z) {
|
|||
|
||||
define float @test16_unary_fneg(float %a, float %b, float %z) {
|
||||
; CHECK-LABEL: @test16_unary_fneg(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = fneg fast float 0.000000e+00
|
||||
; CHECK-NEXT: [[E:%.*]] = fmul fast float [[A:%.*]], 1.234500e+04
|
||||
; CHECK-NEXT: [[F:%.*]] = fmul fast float [[E]], [[B:%.*]]
|
||||
; CHECK-NEXT: [[G:%.*]] = fmul fast float [[F]], [[Z:%.*]]
|
||||
|
@ -539,16 +539,14 @@ define float @test16_reassoc(float %a, float %b, float %z) {
|
|||
}
|
||||
|
||||
; TODO: check if we can remove:
|
||||
; - fsub fast 0, 0
|
||||
; - fadd fast x, 0
|
||||
; ... as 'fast' implies 'nsz'
|
||||
define float @test17(float %a, float %b, float %z) {
|
||||
; CHECK-LABEL: @test17(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = fsub fast float 0.000000e+00, 0.000000e+00
|
||||
; CHECK-NEXT: [[C:%.*]] = fmul fast float [[A:%.*]], 4.000000e+01
|
||||
; CHECK-NEXT: [[E:%.*]] = fmul fast float [[C]], [[Z:%.*]]
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = fadd fast float [[E]], 0.000000e+00
|
||||
; CHECK-NEXT: ret float [[TMP2]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = fadd fast float [[E]], 0.000000e+00
|
||||
; CHECK-NEXT: ret float [[TMP1]]
|
||||
;
|
||||
%d = fmul fast float %z, 4.000000e+01
|
||||
%c = fsub fast float 0.000000e+00, %d
|
||||
|
@ -557,10 +555,8 @@ define float @test17(float %a, float %b, float %z) {
|
|||
ret float %f
|
||||
}
|
||||
|
||||
; TODO: check if we can remove fneg fast 0 as 'fast' implies 'nsz'
|
||||
define float @test17_unary_fneg(float %a, float %b, float %z) {
|
||||
; CHECK-LABEL: @test17_unary_fneg(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = fneg fast float 0.000000e+00
|
||||
; CHECK-NEXT: [[E:%.*]] = fmul fast float [[A:%.*]], 4.000000e+01
|
||||
; CHECK-NEXT: [[F:%.*]] = fmul fast float [[E]], [[Z:%.*]]
|
||||
; CHECK-NEXT: ret float [[F]]
|
||||
|
|
|
@ -5,7 +5,6 @@ define float @wibble(float %tmp6) #0 {
|
|||
; CHECK-LABEL: @wibble(
|
||||
; CHECK-NEXT: bb:
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = fmul float [[TMP6:%.*]], -1.000000e+00
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = fsub float -0.000000e+00, 0.000000e+00
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = fmul fast float [[TMP6]], 0xFFF0000000000000
|
||||
; CHECK-NEXT: ret float [[TMP9]]
|
||||
;
|
||||
|
|
Loading…
Reference in New Issue