forked from OSchip/llvm-project
Add a new pass to simplify specific half_powr function calls. This is
a specialized pass that it not likely to be generally useful. llvm-svn: 58732
This commit is contained in:
parent
db9d785338
commit
8cdea717a3
|
@ -99,6 +99,7 @@ namespace {
|
|||
(void) llvm::createSCCPPass();
|
||||
(void) llvm::createScalarReplAggregatesPass();
|
||||
(void) llvm::createSimplifyLibCallsPass();
|
||||
(void) llvm::createSimplifyHalfPowrLibCallsPass();
|
||||
(void) llvm::createSingleLoopExtractorPass();
|
||||
(void) llvm::createStripSymbolsPass();
|
||||
(void) llvm::createStripDeadPrototypesPass();
|
||||
|
|
|
@ -317,6 +317,12 @@ Pass *createLoopDeletionPass();
|
|||
/// specific well-known (library) functions.
|
||||
FunctionPass *createSimplifyLibCallsPass();
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
/// createSimplifyHalfPowrLibCallsPass - This is an experimental pass that
|
||||
/// optimizes specific half_pow functions.
|
||||
FunctionPass *createSimplifyHalfPowrLibCallsPass();
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// CodeGenPrepare - This pass prepares a function for instruction selection.
|
||||
|
|
|
@ -0,0 +1,159 @@
|
|||
//===- SimplifyHalfPowrLibCalls.cpp - Optimize specific half_powr calls ---===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file implements a simple pass that applies an experimental
|
||||
// transformation on calls to specific functions.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#define DEBUG_TYPE "simplify-libcalls-halfpowr"
|
||||
#include "llvm/Transforms/Scalar.h"
|
||||
#include "llvm/Instructions.h"
|
||||
#include "llvm/Intrinsics.h"
|
||||
#include "llvm/Module.h"
|
||||
#include "llvm/Pass.h"
|
||||
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
|
||||
#include "llvm/Transforms/Utils/Cloning.h"
|
||||
#include "llvm/Target/TargetData.h"
|
||||
#include "llvm/ADT/STLExtras.h"
|
||||
#include "llvm/Support/Compiler.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
#include "llvm/Config/config.h"
|
||||
using namespace llvm;
|
||||
|
||||
namespace {
|
||||
/// This pass optimizes well half_powr function calls.
|
||||
///
|
||||
class VISIBILITY_HIDDEN SimplifyHalfPowrLibCalls : public FunctionPass {
|
||||
const TargetData *TD;
|
||||
public:
|
||||
static char ID; // Pass identification
|
||||
SimplifyHalfPowrLibCalls() : FunctionPass(&ID) {}
|
||||
|
||||
bool runOnFunction(Function &F);
|
||||
|
||||
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
|
||||
AU.addRequired<TargetData>();
|
||||
}
|
||||
|
||||
Instruction *
|
||||
InlineHalfPowrs(const std::vector<Instruction *> &HalfPowrs,
|
||||
Instruction *InsertPt);
|
||||
};
|
||||
char SimplifyHalfPowrLibCalls::ID = 0;
|
||||
} // end anonymous namespace.
|
||||
|
||||
static RegisterPass<SimplifyHalfPowrLibCalls>
|
||||
X("simplify-libcalls-halfpowr", "Simplify half_powr library calls");
|
||||
|
||||
// Public interface to the Simplify HalfPowr LibCalls pass.
|
||||
FunctionPass *llvm::createSimplifyHalfPowrLibCallsPass() {
|
||||
return new SimplifyHalfPowrLibCalls();
|
||||
}
|
||||
|
||||
/// InlineHalfPowrs - Inline a sequence of adjacent half_powr calls, rearranging
|
||||
/// their control flow to better facilitate subsequent optimization.
|
||||
Instruction *
|
||||
SimplifyHalfPowrLibCalls::InlineHalfPowrs(const std::vector<Instruction *> &HalfPowrs,
|
||||
Instruction *InsertPt) {
|
||||
std::vector<BasicBlock *> Bodies;
|
||||
BasicBlock *NewBlock = 0;
|
||||
|
||||
for (unsigned i = 0, e = HalfPowrs.size(); i != e; ++i) {
|
||||
CallInst *Call = cast<CallInst>(HalfPowrs[i]);
|
||||
Function *Callee = Call->getCalledFunction();
|
||||
|
||||
// Minimally sanity-check the CFG of half_powr to ensure that it contains
|
||||
// the the kind of code we expect. If we're running this pass, we have
|
||||
// reason to believe it will be what we expect.
|
||||
Function::iterator I = Callee->begin();
|
||||
BasicBlock *Prologue = I++;
|
||||
if (I == Callee->end()) break;
|
||||
BasicBlock *SubnormalHandling = I++;
|
||||
if (I == Callee->end()) break;
|
||||
BasicBlock *Body = I++;
|
||||
if (I != Callee->end()) break;
|
||||
if (SubnormalHandling->getSinglePredecessor() != Prologue)
|
||||
break;
|
||||
BranchInst *PBI = dyn_cast<BranchInst>(Prologue->getTerminator());
|
||||
if (!PBI || !PBI->isConditional())
|
||||
break;
|
||||
BranchInst *SNBI = dyn_cast<BranchInst>(SubnormalHandling->getTerminator());
|
||||
if (!SNBI || SNBI->isConditional())
|
||||
break;
|
||||
if (!isa<ReturnInst>(Body->getTerminator()))
|
||||
break;
|
||||
|
||||
Instruction *NextInst = next(BasicBlock::iterator(Call));
|
||||
|
||||
// Inline the call, taking care of what code ends up where.
|
||||
NewBlock = SplitBlock(NextInst->getParent(), NextInst, this);
|
||||
|
||||
bool B = InlineFunction(Call, 0, TD);
|
||||
assert(B && "half_powr didn't inline?");
|
||||
|
||||
BasicBlock *NewBody = NewBlock->getSinglePredecessor();
|
||||
assert(NewBody);
|
||||
Bodies.push_back(NewBody);
|
||||
}
|
||||
|
||||
if (!NewBlock)
|
||||
return InsertPt;
|
||||
|
||||
// Put the code for all the bodies into one block, to facilitate
|
||||
// subsequent optimization.
|
||||
(void)SplitEdge(NewBlock->getSinglePredecessor(), NewBlock, this);
|
||||
for (unsigned i = 0, e = Bodies.size(); i != e; ++i) {
|
||||
BasicBlock *Body = Bodies[i];
|
||||
Instruction *FNP = Body->getFirstNonPHI();
|
||||
// Splice the insts from body into NewBlock.
|
||||
NewBlock->getInstList().splice(NewBlock->begin(), Body->getInstList(),
|
||||
FNP, Body->getTerminator());
|
||||
}
|
||||
|
||||
return NewBlock->begin();
|
||||
}
|
||||
|
||||
/// runOnFunction - Top level algorithm.
|
||||
///
|
||||
bool SimplifyHalfPowrLibCalls::runOnFunction(Function &F) {
|
||||
TD = &getAnalysis<TargetData>();
|
||||
|
||||
bool Changed = false;
|
||||
std::vector<Instruction *> HalfPowrs;
|
||||
for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
|
||||
for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
|
||||
// Look for calls.
|
||||
bool IsHalfPowr = false;
|
||||
if (CallInst *CI = dyn_cast<CallInst>(I)) {
|
||||
// Look for direct calls and calls to non-external functions.
|
||||
Function *Callee = CI->getCalledFunction();
|
||||
if (Callee && Callee->hasExternalLinkage()) {
|
||||
// Look for calls with well-known names.
|
||||
const char *CalleeName = Callee->getNameStart();
|
||||
if (strcmp(CalleeName, "__half_powrf4") == 0)
|
||||
IsHalfPowr = true;
|
||||
}
|
||||
}
|
||||
if (IsHalfPowr)
|
||||
HalfPowrs.push_back(I);
|
||||
// We're looking for sequences of up to three such calls, which we'll
|
||||
// simplify as a group.
|
||||
if ((!IsHalfPowr && !HalfPowrs.empty()) || HalfPowrs.size() == 3) {
|
||||
I = InlineHalfPowrs(HalfPowrs, I);
|
||||
E = I->getParent()->end();
|
||||
HalfPowrs.clear();
|
||||
Changed = true;
|
||||
}
|
||||
}
|
||||
assert(HalfPowrs.empty() && "Block had no terminator!");
|
||||
}
|
||||
|
||||
return Changed;
|
||||
}
|
|
@ -0,0 +1,41 @@
|
|||
; RUN: llvm-as < %s | opt -simplify-libcalls-halfpowr | llvm-dis | %prcontext {mul float} 1 | grep {mul float} | count 8
|
||||
|
||||
define float @__half_powrf4(float %f, float %g) nounwind readnone {
|
||||
entry:
|
||||
%0 = fcmp olt float %f, 2.000000e+00 ; <i1> [#uses=1]
|
||||
br i1 %0, label %bb, label %bb1
|
||||
|
||||
bb: ; preds = %entry
|
||||
%1 = fdiv float %f, 3.000000e+00 ; <float> [#uses=1]
|
||||
br label %bb1
|
||||
|
||||
bb1: ; preds = %bb, %entry
|
||||
%f_addr.0 = phi float [ %1, %bb ], [ %f, %entry ] ; <float> [#uses=1]
|
||||
%2 = mul float %f_addr.0, %g ; <float> [#uses=1]
|
||||
ret float %2
|
||||
}
|
||||
|
||||
define void @foo(float* %p) nounwind {
|
||||
entry:
|
||||
%0 = load float* %p, align 4 ; <float> [#uses=1]
|
||||
%1 = getelementptr float* %p, i32 1 ; <float*> [#uses=1]
|
||||
%2 = load float* %1, align 4 ; <float> [#uses=1]
|
||||
%3 = getelementptr float* %p, i32 2 ; <float*> [#uses=1]
|
||||
%4 = load float* %3, align 4 ; <float> [#uses=1]
|
||||
%5 = getelementptr float* %p, i32 3 ; <float*> [#uses=1]
|
||||
%6 = load float* %5, align 4 ; <float> [#uses=1]
|
||||
%7 = getelementptr float* %p, i32 4 ; <float*> [#uses=1]
|
||||
%8 = load float* %7, align 4 ; <float> [#uses=1]
|
||||
%9 = getelementptr float* %p, i32 5 ; <float*> [#uses=1]
|
||||
%10 = load float* %9, align 4 ; <float> [#uses=1]
|
||||
%11 = tail call float @__half_powrf4(float %0, float %6) nounwind ; <float> [#uses=1]
|
||||
%12 = tail call float @__half_powrf4(float %2, float %8) nounwind ; <float> [#uses=1]
|
||||
%13 = tail call float @__half_powrf4(float %4, float %10) nounwind ; <float> [#uses=1]
|
||||
%14 = getelementptr float* %p, i32 6 ; <float*> [#uses=1]
|
||||
store float %11, float* %14, align 4
|
||||
%15 = getelementptr float* %p, i32 7 ; <float*> [#uses=1]
|
||||
store float %12, float* %15, align 4
|
||||
%16 = getelementptr float* %p, i32 8 ; <float*> [#uses=1]
|
||||
store float %13, float* %16, align 4
|
||||
ret void
|
||||
}
|
Loading…
Reference in New Issue