diff --git a/llvm/include/llvm/LinkAllPasses.h b/llvm/include/llvm/LinkAllPasses.h index 158b2d0576b8..568d8c4c235a 100644 --- a/llvm/include/llvm/LinkAllPasses.h +++ b/llvm/include/llvm/LinkAllPasses.h @@ -186,6 +186,7 @@ namespace { (void) llvm::createScalarizerPass(); (void) llvm::createSeparateConstOffsetFromGEPPass(); (void) llvm::createSpeculativeExecutionPass(); + (void) llvm::createSpeculativeExecutionIfHasBranchDivergencePass(); (void) llvm::createRewriteSymbolsPass(); (void) llvm::createStraightLineStrengthReducePass(); (void) llvm::createMemDerefPrinter(); diff --git a/llvm/include/llvm/Transforms/Scalar.h b/llvm/include/llvm/Transforms/Scalar.h index 774d8261bca1..7ed88fbf43f2 100644 --- a/llvm/include/llvm/Transforms/Scalar.h +++ b/llvm/include/llvm/Transforms/Scalar.h @@ -430,6 +430,10 @@ createSeparateConstOffsetFromGEPPass(const TargetMachine *TM = nullptr, // FunctionPass *createSpeculativeExecutionPass(); +// Same as createSpeculativeExecutionPass, but does nothing unless +// TargetTransformInfo::hasBranchDivergence() is true. +FunctionPass *createSpeculativeExecutionIfHasBranchDivergencePass(); + //===----------------------------------------------------------------------===// // // LoadCombine - Combine loads into bigger loads. diff --git a/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp b/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp index 147d615488ff..6b82943abf33 100644 --- a/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp +++ b/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp @@ -50,6 +50,15 @@ // aggressive speculation while counting on later passes to either capitalize on // that or clean it up. // +// If the pass was created by calling +// createSpeculativeExecutionIfHasBranchDivergencePass or the +// -spec-exec-only-if-divergent-target option is present, this pass only has an +// effect on targets where TargetTransformInfo::hasBranchDivergence() is true; +// on other targets, it is a nop. +// +// This lets you include this pass unconditionally in the IR pass pipeline, but +// only enable it for relevant targets. +// //===----------------------------------------------------------------------===// #include "llvm/ADT/SmallSet.h" @@ -83,19 +92,39 @@ static cl::opt SpecExecMaxNotHoisted( "number of instructions that would not be speculatively executed " "exceeds this limit.")); +static cl::opt SpecExecOnlyIfDivergentTarget( + "spec-exec-only-if-divergent-target", cl::init(0), cl::Hidden, + cl::desc("Speculative execution is applied only to targets with divergent " + "branches, even if the pass was configured to apply only to all " + "targets.")); + namespace { + class SpeculativeExecution : public FunctionPass { public: - static char ID; - SpeculativeExecution(): FunctionPass(ID) {} + static char ID; + explicit SpeculativeExecution(bool OnlyIfDivergentTarget = false) + : FunctionPass(ID), + OnlyIfDivergentTarget(OnlyIfDivergentTarget || + SpecExecOnlyIfDivergentTarget) {} - void getAnalysisUsage(AnalysisUsage &AU) const override; - bool runOnFunction(Function &F) override; + void getAnalysisUsage(AnalysisUsage &AU) const override; + bool runOnFunction(Function &F) override; + + const char *getPassName() const override { + if (OnlyIfDivergentTarget) + return "Speculatively execute instructions if target has divergent " + "branches"; + return "Speculatively execute instructions"; + } private: bool runOnBasicBlock(BasicBlock &B); bool considerHoistingFromTo(BasicBlock &FromBlock, BasicBlock &ToBlock); + // If true, this pass is a nop unless the target Targetitecture has branch + // divergence. + const bool OnlyIfDivergentTarget; const TargetTransformInfo *TTI = nullptr; }; } // namespace @@ -105,7 +134,7 @@ INITIALIZE_PASS_BEGIN(SpeculativeExecution, "speculative-execution", "Speculatively execute instructions", false, false) INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) INITIALIZE_PASS_END(SpeculativeExecution, "speculative-execution", - "Speculatively execute instructions", false, false) + "Speculatively execute instructions", false, false) void SpeculativeExecution::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired(); @@ -116,6 +145,11 @@ bool SpeculativeExecution::runOnFunction(Function &F) { return false; TTI = &getAnalysis().getTTI(F); + if (OnlyIfDivergentTarget && !TTI->hasBranchDivergence()) { + DEBUG(dbgs() << "Not running SpeculativeExecution because " + "TTI->hasBranchDivergence() is false.\n"); + return false; + } bool Changed = false; for (auto& B : F) { @@ -240,4 +274,8 @@ FunctionPass *createSpeculativeExecutionPass() { return new SpeculativeExecution(); } +FunctionPass *createSpeculativeExecutionIfHasBranchDivergencePass() { + return new SpeculativeExecution(/* OnlyIfDivergentTarget = */ true); +} + } // namespace llvm diff --git a/llvm/test/Transforms/SpeculativeExecution/divergent-target.ll b/llvm/test/Transforms/SpeculativeExecution/divergent-target.ll new file mode 100644 index 000000000000..d3f2a3fa0d32 --- /dev/null +++ b/llvm/test/Transforms/SpeculativeExecution/divergent-target.ll @@ -0,0 +1,22 @@ +; RUN: opt < %s -S -mtriple=nvptx-nvidia-cuda -speculative-execution | \ +; RUN: FileCheck --check-prefix=ON %s +; RUN: opt < %s -S -mtriple=nvptx-nvidia-cuda -speculative-execution \ +; RUN: -spec-exec-only-if-divergent-target | \ +; RUN: FileCheck --check-prefix=ON %s +; RUN: opt < %s -S -march=x86_64 -speculative-execution \ +; RUN: -spec-exec-only-if-divergent-target | \ +; RUN: FileCheck --check-prefix=OFF %s + +; Hoist in if-then pattern. +define void @f() { +; ON: %x = add i32 2, 3 +; ON: br i1 true +; OFF: br i1 true +; OFF: %x = add i32 2, 3 + br i1 true, label %a, label %b +a: + %x = add i32 2, 3 + br label %b +b: + ret void +}