forked from OSchip/llvm-project
Loop fusion comand line options cleanup
- clean up loop fusion CL options for promoting local buffers to fast memory space - add parameters to loop fusion pass instantiation PiperOrigin-RevId: 235813419
This commit is contained in:
parent
2d4b0e2c00
commit
d4b3ff1096
|
@ -72,8 +72,11 @@ FunctionPass *createLoopUnrollAndJamPass(int unrollJamFactor = -1);
|
||||||
/// Creates an simplification pass for affine structures.
|
/// Creates an simplification pass for affine structures.
|
||||||
FunctionPass *createSimplifyAffineStructuresPass();
|
FunctionPass *createSimplifyAffineStructuresPass();
|
||||||
|
|
||||||
/// Creates a loop fusion pass which fuses loops.
|
/// Creates a loop fusion pass which fuses loops. Buffers of size less than or
|
||||||
FunctionPass *createLoopFusionPass();
|
/// equal to `localBufSizeThreshold` are promoted to memory space
|
||||||
|
/// `fastMemorySpace'.
|
||||||
|
FunctionPass *createLoopFusionPass(unsigned fastMemorySpace = 0,
|
||||||
|
uint64_t localBufSizeThreshold = 0);
|
||||||
|
|
||||||
/// Creates a pass to pipeline explicit movement of data across levels of the
|
/// Creates a pass to pipeline explicit movement of data across levels of the
|
||||||
/// memory hierarchy.
|
/// memory hierarchy.
|
||||||
|
|
|
@ -67,9 +67,11 @@ static llvm::cl::opt<unsigned> clFusionFastMemorySpace(
|
||||||
llvm::cl::desc("Faster memory space number to promote fusion buffers to"),
|
llvm::cl::desc("Faster memory space number to promote fusion buffers to"),
|
||||||
llvm::cl::cat(clOptionsCategory));
|
llvm::cl::cat(clOptionsCategory));
|
||||||
|
|
||||||
static llvm::cl::opt<unsigned> clFusionLocalBufThreshold(
|
// A local buffer of size less than or equal to this size is promoted to fast
|
||||||
|
// memory.
|
||||||
|
static llvm::cl::opt<unsigned long long> clFusionLocalBufThreshold(
|
||||||
"fusion-local-buf-threshold", llvm::cl::Hidden,
|
"fusion-local-buf-threshold", llvm::cl::Hidden,
|
||||||
llvm::cl::desc("Threshold size (bytes) for promoting local buffers to fast "
|
llvm::cl::desc("Threshold size (KiB) for promoting local buffers to fast "
|
||||||
"memory space"),
|
"memory space"),
|
||||||
llvm::cl::cat(clOptionsCategory));
|
llvm::cl::cat(clOptionsCategory));
|
||||||
|
|
||||||
|
@ -85,14 +87,17 @@ namespace {
|
||||||
// and add support for more general loop fusion algorithms.
|
// and add support for more general loop fusion algorithms.
|
||||||
|
|
||||||
struct LoopFusion : public FunctionPass {
|
struct LoopFusion : public FunctionPass {
|
||||||
LoopFusion() : FunctionPass(&LoopFusion::passID) {}
|
LoopFusion(unsigned fastMemorySpace = 0, uint64_t localBufSizeThreshold = 0)
|
||||||
|
: FunctionPass(&LoopFusion::passID),
|
||||||
|
localBufSizeThreshold(localBufSizeThreshold),
|
||||||
|
fastMemorySpace(fastMemorySpace) {}
|
||||||
|
|
||||||
PassResult runOnFunction(Function *f) override;
|
PassResult runOnFunction(Function *f) override;
|
||||||
constexpr static PassID passID = {};
|
constexpr static PassID passID = {};
|
||||||
|
|
||||||
// Any local buffers smaller than this size will be created in
|
// Any local buffers smaller than this size (in bytes) will be created in
|
||||||
// `fastMemorySpace` if provided.
|
// `fastMemorySpace` if provided.
|
||||||
unsigned localBufSizeThreshold = 1024;
|
uint64_t localBufSizeThreshold;
|
||||||
Optional<unsigned> fastMemorySpace = None;
|
Optional<unsigned> fastMemorySpace = None;
|
||||||
|
|
||||||
// The amount of additional computation that is tolerated while fusing
|
// The amount of additional computation that is tolerated while fusing
|
||||||
|
@ -102,7 +107,10 @@ struct LoopFusion : public FunctionPass {
|
||||||
|
|
||||||
} // end anonymous namespace
|
} // end anonymous namespace
|
||||||
|
|
||||||
FunctionPass *mlir::createLoopFusionPass() { return new LoopFusion; }
|
FunctionPass *mlir::createLoopFusionPass(unsigned fastMemorySpace,
|
||||||
|
uint64_t localBufSizeThreshold) {
|
||||||
|
return new LoopFusion(fastMemorySpace, localBufSizeThreshold);
|
||||||
|
}
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
|
@ -632,7 +640,7 @@ struct LoopNestStatsCollector {
|
||||||
unsigned count = 0;
|
unsigned count = 0;
|
||||||
stats->opCountMap[forInst] = 0;
|
stats->opCountMap[forInst] = 0;
|
||||||
for (auto &inst : *forOp->getBody()) {
|
for (auto &inst : *forOp->getBody()) {
|
||||||
if (!(inst.isa<AffineForOp>() || inst.isa<AffineIfOp>()))
|
if (!inst.isa<AffineForOp>() && !inst.isa<AffineIfOp>())
|
||||||
++count;
|
++count;
|
||||||
}
|
}
|
||||||
stats->opCountMap[forInst] = count;
|
stats->opCountMap[forInst] = count;
|
||||||
|
@ -1048,7 +1056,7 @@ static Value *createPrivateMemRef(OpPointer<AffineForOp> forOp,
|
||||||
Instruction *srcStoreOpInst,
|
Instruction *srcStoreOpInst,
|
||||||
unsigned dstLoopDepth,
|
unsigned dstLoopDepth,
|
||||||
Optional<unsigned> fastMemorySpace,
|
Optional<unsigned> fastMemorySpace,
|
||||||
unsigned localBufSizeThreshold) {
|
uint64_t localBufSizeThreshold) {
|
||||||
auto *forInst = forOp->getInstruction();
|
auto *forInst = forOp->getInstruction();
|
||||||
|
|
||||||
// Create builder to insert alloc op just before 'forOp'.
|
// Create builder to insert alloc op just before 'forOp'.
|
||||||
|
@ -1102,7 +1110,7 @@ static Value *createPrivateMemRef(OpPointer<AffineForOp> forOp,
|
||||||
uint64_t bufSize =
|
uint64_t bufSize =
|
||||||
getMemRefEltSizeInBytes(oldMemRefType) * numElements.getValue();
|
getMemRefEltSizeInBytes(oldMemRefType) * numElements.getValue();
|
||||||
unsigned newMemSpace;
|
unsigned newMemSpace;
|
||||||
if (bufSize < localBufSizeThreshold && fastMemorySpace.hasValue()) {
|
if (bufSize <= localBufSizeThreshold && fastMemorySpace.hasValue()) {
|
||||||
newMemSpace = fastMemorySpace.getValue();
|
newMemSpace = fastMemorySpace.getValue();
|
||||||
} else {
|
} else {
|
||||||
newMemSpace = oldMemRefType.getMemorySpace();
|
newMemSpace = oldMemRefType.getMemorySpace();
|
||||||
|
@ -1414,7 +1422,8 @@ static bool isFusionProfitable(Instruction *srcOpInst,
|
||||||
LLVM_DEBUG({
|
LLVM_DEBUG({
|
||||||
std::stringstream msg;
|
std::stringstream msg;
|
||||||
msg << " evaluating fusion profitability at depth : " << i << "\n"
|
msg << " evaluating fusion profitability at depth : " << i << "\n"
|
||||||
<< std::setprecision(2) << " additional compute fraction: "
|
<< std::fixed << std::setprecision(2)
|
||||||
|
<< " additional compute fraction: "
|
||||||
<< 100.0 * additionalComputeFraction << "%\n"
|
<< 100.0 * additionalComputeFraction << "%\n"
|
||||||
<< " storage reduction factor: " << storageReduction << "x\n"
|
<< " storage reduction factor: " << storageReduction << "x\n"
|
||||||
<< " fused nest cost: " << fusedLoopNestComputeCost << "\n"
|
<< " fused nest cost: " << fusedLoopNestComputeCost << "\n"
|
||||||
|
@ -1795,10 +1804,16 @@ public:
|
||||||
} // end anonymous namespace
|
} // end anonymous namespace
|
||||||
|
|
||||||
PassResult LoopFusion::runOnFunction(Function *f) {
|
PassResult LoopFusion::runOnFunction(Function *f) {
|
||||||
|
// Override if a command line argument was provided.
|
||||||
if (clFusionFastMemorySpace.getNumOccurrences() > 0) {
|
if (clFusionFastMemorySpace.getNumOccurrences() > 0) {
|
||||||
fastMemorySpace = clFusionFastMemorySpace.getValue();
|
fastMemorySpace = clFusionFastMemorySpace.getValue();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Override if a command line argument was provided.
|
||||||
|
if (clFusionLocalBufThreshold.getNumOccurrences() > 0) {
|
||||||
|
localBufSizeThreshold = clFusionLocalBufThreshold * 1024;
|
||||||
|
}
|
||||||
|
|
||||||
MemRefDependenceGraph g;
|
MemRefDependenceGraph g;
|
||||||
if (g.init(f))
|
if (g.init(f))
|
||||||
GreedyFusion(&g).run(localBufSizeThreshold, fastMemorySpace);
|
GreedyFusion(&g).run(localBufSizeThreshold, fastMemorySpace);
|
||||||
|
|
Loading…
Reference in New Issue