forked from OSchip/llvm-project
Reassociate x + -0.1234 * y into x - 0.1234 * y
This does not require -ffast-math, and it gives CSE/GVN more options to eliminate duplicate expressions in, e.g.: return ((x + 0.1234 * y) * (x - 0.1234 * y)); Differential Revision: http://reviews.llvm.org/D4904 llvm-svn: 216169
This commit is contained in:
parent
b791ef21d2
commit
2b98bd2a80
|
@ -93,44 +93,6 @@ This requires reassociating to forms of expressions that are already available,
|
|||
something that reassoc doesn't think about yet.
|
||||
|
||||
|
||||
//===---------------------------------------------------------------------===//
|
||||
|
||||
This function: (derived from GCC PR19988)
|
||||
double foo(double x, double y) {
|
||||
return ((x + 0.1234 * y) * (x + -0.1234 * y));
|
||||
}
|
||||
|
||||
compiles to:
|
||||
_foo:
|
||||
movapd %xmm1, %xmm2
|
||||
mulsd LCPI1_1(%rip), %xmm1
|
||||
mulsd LCPI1_0(%rip), %xmm2
|
||||
addsd %xmm0, %xmm1
|
||||
addsd %xmm0, %xmm2
|
||||
movapd %xmm1, %xmm0
|
||||
mulsd %xmm2, %xmm0
|
||||
ret
|
||||
|
||||
Reassociate should be able to turn it into:
|
||||
|
||||
double foo(double x, double y) {
|
||||
return ((x + 0.1234 * y) * (x - 0.1234 * y));
|
||||
}
|
||||
|
||||
Which allows the multiply by constant to be CSE'd, producing:
|
||||
|
||||
_foo:
|
||||
mulsd LCPI1_0(%rip), %xmm1
|
||||
movapd %xmm1, %xmm2
|
||||
addsd %xmm0, %xmm2
|
||||
subsd %xmm1, %xmm0
|
||||
mulsd %xmm2, %xmm0
|
||||
ret
|
||||
|
||||
This doesn't need -ffast-math support at all. This is particularly bad because
|
||||
the llvm-gcc frontend is canonicalizing the later into the former, but clang
|
||||
doesn't have this problem.
|
||||
|
||||
//===---------------------------------------------------------------------===//
|
||||
|
||||
These two functions should generate the same code on big-endian systems:
|
||||
|
|
|
@ -193,6 +193,8 @@ namespace {
|
|||
Value *OptimizeMul(BinaryOperator *I, SmallVectorImpl<ValueEntry> &Ops);
|
||||
Value *RemoveFactorFromExpression(Value *V, Value *Factor);
|
||||
void EraseInst(Instruction *I);
|
||||
void optimizeFAddNegExpr(ConstantFP *ConstOperand, Instruction *I,
|
||||
int OperandNr);
|
||||
void OptimizeInst(Instruction *I);
|
||||
};
|
||||
}
|
||||
|
@ -1914,6 +1916,33 @@ void Reassociate::EraseInst(Instruction *I) {
|
|||
}
|
||||
}
|
||||
|
||||
void Reassociate::optimizeFAddNegExpr(ConstantFP *ConstOperand, Instruction *I,
|
||||
int OperandNr) {
|
||||
// Change the sign of the constant.
|
||||
APFloat Val = ConstOperand->getValueAPF();
|
||||
Val.changeSign();
|
||||
I->setOperand(0, ConstantFP::get(ConstOperand->getContext(), Val));
|
||||
|
||||
assert(I->hasOneUse() && "Only a single use can be replaced.");
|
||||
Instruction *Parent = I->user_back();
|
||||
|
||||
Value *OtherOperand = Parent->getOperand(1 - OperandNr);
|
||||
|
||||
unsigned Opcode = Parent->getOpcode();
|
||||
assert(Opcode == Instruction::FAdd ||
|
||||
(Opcode == Instruction::FSub && Parent->getOperand(1) == I));
|
||||
|
||||
BinaryOperator *NI = Opcode == Instruction::FAdd
|
||||
? BinaryOperator::CreateFSub(OtherOperand, I)
|
||||
: BinaryOperator::CreateFAdd(OtherOperand, I);
|
||||
NI->setFastMathFlags(cast<FPMathOperator>(Parent)->getFastMathFlags());
|
||||
NI->insertBefore(Parent);
|
||||
NI->setName(Parent->getName() + ".repl");
|
||||
Parent->replaceAllUsesWith(NI);
|
||||
NI->setDebugLoc(I->getDebugLoc());
|
||||
MadeChange = true;
|
||||
}
|
||||
|
||||
/// OptimizeInst - Inspect and optimize the given instruction. Note that erasing
|
||||
/// instructions is not allowed.
|
||||
void Reassociate::OptimizeInst(Instruction *I) {
|
||||
|
@ -1940,8 +1969,8 @@ void Reassociate::OptimizeInst(Instruction *I) {
|
|||
if (I->getType()->isFloatingPointTy() || I->getType()->isVectorTy()) {
|
||||
|
||||
// FAdd and FMul can be commuted.
|
||||
if (I->getOpcode() == Instruction::FMul ||
|
||||
I->getOpcode() == Instruction::FAdd) {
|
||||
unsigned Opcode = I->getOpcode();
|
||||
if (Opcode == Instruction::FMul || Opcode == Instruction::FAdd) {
|
||||
Value *LHS = I->getOperand(0);
|
||||
Value *RHS = I->getOperand(1);
|
||||
unsigned LHSRank = getRank(LHS);
|
||||
|
@ -1954,6 +1983,24 @@ void Reassociate::OptimizeInst(Instruction *I) {
|
|||
}
|
||||
}
|
||||
|
||||
// Reassociate: x + -ConstantFP * y -> x - ConstantFP * y
|
||||
// The FMul can also be an FDiv, and FAdd can be a FSub.
|
||||
if (Opcode == Instruction::FMul || Opcode == Instruction::FDiv) {
|
||||
if (ConstantFP *LHSConst = dyn_cast<ConstantFP>(I->getOperand(0))) {
|
||||
if (LHSConst->isNegative() && I->hasOneUse()) {
|
||||
Instruction *Parent = I->user_back();
|
||||
if (Parent->getOpcode() == Instruction::FAdd) {
|
||||
if (Parent->getOperand(0) == I)
|
||||
optimizeFAddNegExpr(LHSConst, I, 0);
|
||||
else if (Parent->getOperand(1) == I)
|
||||
optimizeFAddNegExpr(LHSConst, I, 1);
|
||||
} else if (Parent->getOpcode() == Instruction::FSub)
|
||||
if (Parent->getOperand(1) == I)
|
||||
optimizeFAddNegExpr(LHSConst, I, 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// FIXME: We should commute vector instructions as well. However, this
|
||||
// requires further analysis to determine the effect on later passes.
|
||||
|
||||
|
|
|
@ -193,7 +193,7 @@ define float @test13(float %X1, float %X2, float %X3) {
|
|||
define float @test14(float %X1, float %X2) {
|
||||
; CHECK-LABEL: @test14
|
||||
; CHECK-NEXT: fsub fast float %X1, %X2
|
||||
; CHECK-NEXT: fmul fast float %tmp, 4.700000e+01
|
||||
; CHECK-NEXT: fmul fast float %1, 4.700000e+01
|
||||
; CHECK-NEXT: ret float
|
||||
|
||||
%B = fmul fast float %X1, 47. ; X1*47
|
||||
|
|
|
@ -0,0 +1,69 @@
|
|||
; RUN: opt -reassociate -gvn -S < %s | FileCheck %s
|
||||
|
||||
; (x + 0.1234 * y) * (x + -0.1234 * y) -> (x + 0.1234 * y) * (x - 0.1234 * y)
|
||||
; so CSE can simplify it further
|
||||
define double @lift_sign1(double %x, double %y) nounwind readnone ssp uwtable {
|
||||
; CHECK-LABEL: @lift_sign1(
|
||||
%mul = fmul double 1.234000e-01, %y
|
||||
%add = fadd double %mul, %x
|
||||
%mul1 = fmul double -1.234000e-01, %y
|
||||
%add2 = fadd double %mul1, %x
|
||||
%mul3 = fmul double %add, %add2
|
||||
; CHECK-NOT: %mul1 = fmul double -1.234000e-01, %y
|
||||
; CHECK-NOT: %add2 = fadd %mul1, %x
|
||||
; CHECK: %add2.repl = fsub double %x, %mul
|
||||
; CHECK: %mul3 = fmul double %add, %add2
|
||||
ret double %mul3
|
||||
}
|
||||
|
||||
; (x + -0.1234 * y) * (x + -0.1234 * y) -> (x - 0.1234 * y) * (x - 0.1234 * y)
|
||||
; GVN can then rewrite it even further
|
||||
define double @lift_sign2(double %x, double %y) nounwind readnone ssp uwtable {
|
||||
; CHECK-LABEL: @lift_sign2(
|
||||
%mul = fmul double %y, -1.234000e-01
|
||||
%add = fadd double %mul, %x
|
||||
%mul1 = fmul double %y, -1.234000e-01
|
||||
%add2 = fadd double %mul1, %x
|
||||
%mul3 = fmul double %add, %add2
|
||||
; CHECK-NOT: %mul = fmul double %y, -1.234000e-01
|
||||
; CHECK-NOT: %add = fadd double %mul, %x
|
||||
; CHECK-NOT: %mul1 = fmul double %y, -1.234000e-01
|
||||
; CHECK-NOT: %add2 = fadd double %mul1, %x
|
||||
; CHECK-NOT: %mul3 = fmul double %add, %add2
|
||||
; CHECK: %mul = fmul double 1.234000e-01, %y
|
||||
; CHECK: %add.repl = fsub double %x, %mul
|
||||
; CHECK: %mul3 = fmul double %add.repl, %add.repl
|
||||
ret double %mul3
|
||||
}
|
||||
|
||||
; (x + 0.1234 * y) * (x - -0.1234 * y) -> (x + 0.1234 * y) * (x + 0.1234 * y)
|
||||
define double @lift_sign3(double %x, double %y) nounwind readnone ssp uwtable {
|
||||
; CHECK-LABEL: @lift_sign3(
|
||||
%mul = fmul double %y, 1.234000e-01
|
||||
%add = fadd double %mul, %x
|
||||
%mul1 = fmul double %y, -1.234000e-01
|
||||
%add2 = fsub double %x, %mul1
|
||||
%mul3 = fmul double %add, %add2
|
||||
; CHECK-NOT: %mul1 = fmul double %y, -1.234000e-01
|
||||
; CHECK-NOT: %add2 = fsub double %x, %mul1
|
||||
; CHECK-NOT: %mul3 = fmul double %add, %add2
|
||||
; CHECK: %mul3 = fmul double %add, %add
|
||||
ret double %mul3
|
||||
}
|
||||
|
||||
; (x + 0.1234 / y) * (x + -0.1234 / y) -> (x + 0.1234 / y) * (x - 0.1234 / y)
|
||||
; so CSE can simplify it further
|
||||
define double @lift_sign4(double %x, double %y) nounwind readnone ssp uwtable {
|
||||
; CHECK-LABEL: @lift_sign4(
|
||||
%div = fdiv double 1.234000e-01, %y
|
||||
%add = fadd double %div, %x
|
||||
%div1 = fdiv double -1.234000e-01, %y
|
||||
%add2 = fadd double %div1, %x
|
||||
%mul3 = fmul double %add, %add2
|
||||
; CHECK-NOT: %div1 = fdiv double -1.234000e-01, %y
|
||||
; CHECK-NOT: %add2 = fadd double %div1, %x
|
||||
; CHECK-NOT: %mul3 = fmul double %add, %add2
|
||||
; CHECK: %add2.repl = fsub double %x, %div
|
||||
; CHECK: %mul3 = fmul double %add, %add2.repl
|
||||
ret double %mul3
|
||||
}
|
Loading…
Reference in New Issue