forked from OSchip/llvm-project
[SelectOpti][2/5] Select-to-branch base transformation
This patch implements the actual transformation of selects to branches. It includes only the base transformation without any sinking. Depends on D120230 Reviewed By: davidxl Differential Revision: https://reviews.llvm.org/D122259
This commit is contained in:
parent
12bae5f3e2
commit
97c3ef5c8a
|
@ -10,16 +10,40 @@
|
|||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "llvm/ADT/SmallVector.h"
|
||||
#include "llvm/ADT/Statistic.h"
|
||||
#include "llvm/Analysis/BlockFrequencyInfo.h"
|
||||
#include "llvm/Analysis/BranchProbabilityInfo.h"
|
||||
#include "llvm/Analysis/LoopInfo.h"
|
||||
#include "llvm/CodeGen/Passes.h"
|
||||
#include "llvm/CodeGen/TargetLowering.h"
|
||||
#include "llvm/CodeGen/TargetPassConfig.h"
|
||||
#include "llvm/CodeGen/TargetSchedule.h"
|
||||
#include "llvm/CodeGen/TargetSubtargetInfo.h"
|
||||
#include "llvm/IR/BasicBlock.h"
|
||||
#include "llvm/IR/Function.h"
|
||||
#include "llvm/IR/IRBuilder.h"
|
||||
#include "llvm/IR/Instruction.h"
|
||||
#include "llvm/InitializePasses.h"
|
||||
#include "llvm/Pass.h"
|
||||
#include "llvm/Target/TargetMachine.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
#define DEBUG_TYPE "select-optimize"
|
||||
|
||||
STATISTIC(NumSelectsConverted, "Number of selects converted");
|
||||
|
||||
namespace {
|
||||
|
||||
class SelectOptimize : public FunctionPass {
|
||||
const TargetMachine *TM = nullptr;
|
||||
const TargetSubtargetInfo *TSI;
|
||||
const TargetLowering *TLI = nullptr;
|
||||
const LoopInfo *LI;
|
||||
std::unique_ptr<BlockFrequencyInfo> BFI;
|
||||
std::unique_ptr<BranchProbabilityInfo> BPI;
|
||||
|
||||
public:
|
||||
static char ID;
|
||||
SelectOptimize() : FunctionPass(ID) {
|
||||
|
@ -28,16 +52,218 @@ public:
|
|||
|
||||
bool runOnFunction(Function &F) override;
|
||||
|
||||
void getAnalysisUsage(AnalysisUsage &AU) const override {}
|
||||
void getAnalysisUsage(AnalysisUsage &AU) const override {
|
||||
AU.addRequired<TargetPassConfig>();
|
||||
AU.addRequired<LoopInfoWrapperPass>();
|
||||
}
|
||||
|
||||
private:
|
||||
// Select groups consist of consecutive select instructions with the same
|
||||
// condition.
|
||||
using SelectGroup = SmallVector<SelectInst *, 2>;
|
||||
using SelectGroups = SmallVector<SelectGroup, 2>;
|
||||
|
||||
bool optimizeSelects(Function &F);
|
||||
void convertProfitableSIGroups(SelectGroups &ProfSIGroups);
|
||||
void collectSelectGroups(BasicBlock &BB, SelectGroups &SIGroups);
|
||||
bool isSelectKindSupported(SelectInst *SI);
|
||||
};
|
||||
} // namespace
|
||||
|
||||
char SelectOptimize::ID = 0;
|
||||
INITIALIZE_PASS(SelectOptimize, "select-optimize", "Optimize selects", false,
|
||||
false)
|
||||
|
||||
INITIALIZE_PASS_BEGIN(SelectOptimize, DEBUG_TYPE, "Optimize selects", false,
|
||||
false)
|
||||
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
|
||||
INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
|
||||
INITIALIZE_PASS_END(SelectOptimize, DEBUG_TYPE, "Optimize selects", false,
|
||||
false)
|
||||
|
||||
FunctionPass *llvm::createSelectOptimizePass() { return new SelectOptimize(); }
|
||||
|
||||
bool SelectOptimize::runOnFunction(Function &F) {
|
||||
llvm_unreachable("Unimplemented");
|
||||
TM = &getAnalysis<TargetPassConfig>().getTM<TargetMachine>();
|
||||
TSI = TM->getSubtargetImpl(F);
|
||||
TLI = TSI->getTargetLowering();
|
||||
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
|
||||
BPI.reset(new BranchProbabilityInfo(F, *LI));
|
||||
BFI.reset(new BlockFrequencyInfo(F, *BPI, *LI));
|
||||
|
||||
return optimizeSelects(F);
|
||||
}
|
||||
|
||||
bool SelectOptimize::optimizeSelects(Function &F) {
|
||||
// Collect all the select groups.
|
||||
SelectGroups SIGroups;
|
||||
for (BasicBlock &BB : F) {
|
||||
collectSelectGroups(BB, SIGroups);
|
||||
}
|
||||
|
||||
// Determine for which select groups it is profitable converting to branches.
|
||||
SelectGroups ProfSIGroups;
|
||||
// For now assume that all select groups can be profitably converted to
|
||||
// branches.
|
||||
for (SelectGroup &ASI : SIGroups) {
|
||||
ProfSIGroups.push_back(ASI);
|
||||
}
|
||||
|
||||
// Convert to branches the select groups that were deemed
|
||||
// profitable-to-convert.
|
||||
convertProfitableSIGroups(ProfSIGroups);
|
||||
|
||||
// Code modified if at least one select group was converted.
|
||||
return !ProfSIGroups.empty();
|
||||
}
|
||||
|
||||
/// If \p isTrue is true, return the true value of \p SI, otherwise return
|
||||
/// false value of \p SI. If the true/false value of \p SI is defined by any
|
||||
/// select instructions in \p Selects, look through the defining select
|
||||
/// instruction until the true/false value is not defined in \p Selects.
|
||||
static Value *
|
||||
getTrueOrFalseValue(SelectInst *SI, bool isTrue,
|
||||
const SmallPtrSet<const Instruction *, 2> &Selects) {
|
||||
Value *V = nullptr;
|
||||
for (SelectInst *DefSI = SI; DefSI != nullptr && Selects.count(DefSI);
|
||||
DefSI = dyn_cast<SelectInst>(V)) {
|
||||
assert(DefSI->getCondition() == SI->getCondition() &&
|
||||
"The condition of DefSI does not match with SI");
|
||||
V = (isTrue ? DefSI->getTrueValue() : DefSI->getFalseValue());
|
||||
}
|
||||
assert(V && "Failed to get select true/false value");
|
||||
return V;
|
||||
}
|
||||
|
||||
void SelectOptimize::convertProfitableSIGroups(SelectGroups &ProfSIGroups) {
|
||||
for (SelectGroup &ASI : ProfSIGroups) {
|
||||
// TODO: eliminate the redundancy of logic transforming selects to branches
|
||||
// by removing CodeGenPrepare::optimizeSelectInst and optimizing here
|
||||
// selects for all cases (with and without profile information).
|
||||
|
||||
// Transform a sequence like this:
|
||||
// start:
|
||||
// %cmp = cmp uge i32 %a, %b
|
||||
// %sel = select i1 %cmp, i32 %c, i32 %d
|
||||
//
|
||||
// Into:
|
||||
// start:
|
||||
// %cmp = cmp uge i32 %a, %b
|
||||
// %cmp.frozen = freeze %cmp
|
||||
// br i1 %cmp.frozen, label %select.end, label %select.false
|
||||
// select.false:
|
||||
// br label %select.end
|
||||
// select.end:
|
||||
// %sel = phi i32 [ %c, %start ], [ %d, %select.false ]
|
||||
//
|
||||
// %cmp should be frozen, otherwise it may introduce undefined behavior.
|
||||
|
||||
// We split the block containing the select(s) into two blocks.
|
||||
SelectInst *SI = ASI.front();
|
||||
SelectInst *LastSI = ASI.back();
|
||||
BasicBlock *StartBlock = SI->getParent();
|
||||
BasicBlock::iterator SplitPt = ++(BasicBlock::iterator(LastSI));
|
||||
BasicBlock *EndBlock = StartBlock->splitBasicBlock(SplitPt, "select.end");
|
||||
BFI->setBlockFreq(EndBlock, BFI->getBlockFreq(StartBlock).getFrequency());
|
||||
// Delete the unconditional branch that was just created by the split.
|
||||
StartBlock->getTerminator()->eraseFromParent();
|
||||
|
||||
// Move any debug/pseudo instructions that were in-between the select
|
||||
// group to the newly-created end block.
|
||||
SmallVector<Instruction *, 2> DebugPseudoINS;
|
||||
auto DIt = SI->getIterator();
|
||||
while (&*DIt != LastSI) {
|
||||
if (DIt->isDebugOrPseudoInst())
|
||||
DebugPseudoINS.push_back(&*DIt);
|
||||
DIt++;
|
||||
}
|
||||
for (auto DI : DebugPseudoINS) {
|
||||
DI->moveBefore(&*EndBlock->getFirstInsertionPt());
|
||||
}
|
||||
|
||||
// These are the new basic blocks for the conditional branch.
|
||||
// For now, no instruction sinking to the true/false blocks.
|
||||
// Thus both True and False blocks will be empty.
|
||||
BasicBlock *TrueBlock = nullptr, *FalseBlock = nullptr;
|
||||
|
||||
// Use the 'false' side for a new input value to the PHI.
|
||||
FalseBlock = BasicBlock::Create(SI->getContext(), "select.false",
|
||||
EndBlock->getParent(), EndBlock);
|
||||
auto *FalseBranch = BranchInst::Create(EndBlock, FalseBlock);
|
||||
FalseBranch->setDebugLoc(SI->getDebugLoc());
|
||||
|
||||
// For the 'true' side the path originates from the start block from the
|
||||
// point view of the new PHI.
|
||||
TrueBlock = StartBlock;
|
||||
|
||||
// Insert the real conditional branch based on the original condition.
|
||||
BasicBlock *TT, *FT;
|
||||
TT = EndBlock;
|
||||
FT = FalseBlock;
|
||||
IRBuilder<> IB(SI);
|
||||
auto *CondFr =
|
||||
IB.CreateFreeze(SI->getCondition(), SI->getName() + ".frozen");
|
||||
IB.CreateCondBr(CondFr, TT, FT, SI);
|
||||
|
||||
SmallPtrSet<const Instruction *, 2> INS;
|
||||
INS.insert(ASI.begin(), ASI.end());
|
||||
// Use reverse iterator because later select may use the value of the
|
||||
// earlier select, and we need to propagate value through earlier select
|
||||
// to get the PHI operand.
|
||||
for (auto It = ASI.rbegin(); It != ASI.rend(); ++It) {
|
||||
SelectInst *SI = *It;
|
||||
// The select itself is replaced with a PHI Node.
|
||||
PHINode *PN = PHINode::Create(SI->getType(), 2, "", &EndBlock->front());
|
||||
PN->takeName(SI);
|
||||
PN->addIncoming(getTrueOrFalseValue(SI, true, INS), TrueBlock);
|
||||
PN->addIncoming(getTrueOrFalseValue(SI, false, INS), FalseBlock);
|
||||
PN->setDebugLoc(SI->getDebugLoc());
|
||||
|
||||
SI->replaceAllUsesWith(PN);
|
||||
SI->eraseFromParent();
|
||||
INS.erase(SI);
|
||||
++NumSelectsConverted;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void SelectOptimize::collectSelectGroups(BasicBlock &BB,
|
||||
SelectGroups &SIGroups) {
|
||||
BasicBlock::iterator BBIt = BB.begin();
|
||||
while (BBIt != BB.end()) {
|
||||
Instruction *I = &*BBIt++;
|
||||
if (SelectInst *SI = dyn_cast<SelectInst>(I)) {
|
||||
SelectGroup SIGroup;
|
||||
SIGroup.push_back(SI);
|
||||
while (BBIt != BB.end()) {
|
||||
Instruction *NI = &*BBIt;
|
||||
SelectInst *NSI = dyn_cast<SelectInst>(NI);
|
||||
if (NSI && SI->getCondition() == NSI->getCondition()) {
|
||||
SIGroup.push_back(NSI);
|
||||
} else if (!NI->isDebugOrPseudoInst()) {
|
||||
// Debug/pseudo instructions should be skipped and not prevent the
|
||||
// formation of a select group.
|
||||
break;
|
||||
}
|
||||
++BBIt;
|
||||
}
|
||||
|
||||
// If the select type is not supported, no point optimizing it.
|
||||
// Instruction selection will take care of it.
|
||||
if (!isSelectKindSupported(SI))
|
||||
continue;
|
||||
|
||||
SIGroups.push_back(SIGroup);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool SelectOptimize::isSelectKindSupported(SelectInst *SI) {
|
||||
bool VectorCond = !SI->getCondition()->getType()->isIntegerTy(1);
|
||||
if (VectorCond)
|
||||
return false;
|
||||
TargetLowering::SelectSupportKind SelectKind;
|
||||
if (SI->getType()->isVectorTy())
|
||||
SelectKind = TargetLowering::ScalarCondVectorVal;
|
||||
else
|
||||
SelectKind = TargetLowering::ScalarValSelect;
|
||||
return TLI->isSelectSupported(SelectKind);
|
||||
}
|
||||
|
|
|
@ -0,0 +1,71 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||
; RUN: opt -mtriple=x86_64-unknown-unknown -select-optimize -S < %s | FileCheck %s
|
||||
|
||||
; Single select converted to branch
|
||||
define i32 @single_select(i32 %a, i32 %b, i1 %cmp) {
|
||||
; CHECK-LABEL: @single_select(
|
||||
; CHECK-NEXT: [[SEL_FROZEN:%.*]] = freeze i1 [[CMP:%.*]]
|
||||
; CHECK-NEXT: br i1 [[SEL_FROZEN]], label [[SELECT_END:%.*]], label [[SELECT_FALSE:%.*]], !prof [[PROF2:![0-9]+]]
|
||||
; CHECK: select.false:
|
||||
; CHECK-NEXT: br label [[SELECT_END]]
|
||||
; CHECK: select.end:
|
||||
; CHECK-NEXT: [[SEL:%.*]] = phi i32 [ [[A:%.*]], [[TMP0:%.*]] ], [ [[B:%.*]], [[SELECT_FALSE]] ]
|
||||
; CHECK-NEXT: ret i32 [[SEL]]
|
||||
;
|
||||
%sel = select i1 %cmp, i32 %a, i32 %b, !prof !0
|
||||
ret i32 %sel
|
||||
}
|
||||
|
||||
; Select group converted to branch
|
||||
define i32 @select_group(i32 %a, i32 %b, i32 %c, i1 %cmp) {
|
||||
; CHECK-LABEL: @select_group(
|
||||
; CHECK-NEXT: [[SEL1_FROZEN:%.*]] = freeze i1 [[CMP:%.*]]
|
||||
; CHECK-NEXT: br i1 [[SEL1_FROZEN]], label [[SELECT_END:%.*]], label [[SELECT_FALSE:%.*]], !prof [[PROF2]]
|
||||
; CHECK: select.false:
|
||||
; CHECK-NEXT: br label [[SELECT_END]]
|
||||
; CHECK: select.end:
|
||||
; CHECK-NEXT: [[SEL1:%.*]] = phi i32 [ [[A:%.*]], [[TMP0:%.*]] ], [ [[B:%.*]], [[SELECT_FALSE]] ]
|
||||
; CHECK-NEXT: [[SEL2:%.*]] = phi i32 [ [[C:%.*]], [[TMP0]] ], [ [[A]], [[SELECT_FALSE]] ]
|
||||
; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 [[SEL1]], metadata [[META3:![0-9]+]], metadata !DIExpression()), !dbg [[DBG8:![0-9]+]]
|
||||
; CHECK-NEXT: [[ADD:%.*]] = add i32 [[SEL1]], [[SEL2]]
|
||||
; CHECK-NEXT: ret i32 [[ADD]]
|
||||
;
|
||||
%sel1 = select i1 %cmp, i32 %a, i32 %b, !prof !0
|
||||
call void @llvm.dbg.value(metadata i32 %sel1, metadata !4, metadata !DIExpression()), !dbg !DILocation(scope: !3)
|
||||
%sel2 = select i1 %cmp, i32 %c, i32 %a, !prof !0
|
||||
%add = add i32 %sel1, %sel2
|
||||
ret i32 %add
|
||||
}
|
||||
|
||||
; Select group with intra-group dependence converted to branch
|
||||
define i32 @select_group_intra_group(i32 %a, i32 %b, i32 %c, i1 %cmp) {
|
||||
; CHECK-LABEL: @select_group_intra_group(
|
||||
; CHECK-NEXT: [[SEL1_FROZEN:%.*]] = freeze i1 [[CMP:%.*]]
|
||||
; CHECK-NEXT: br i1 [[SEL1_FROZEN]], label [[SELECT_END:%.*]], label [[SELECT_FALSE:%.*]], !prof [[PROF2]]
|
||||
; CHECK: select.false:
|
||||
; CHECK-NEXT: br label [[SELECT_END]]
|
||||
; CHECK: select.end:
|
||||
; CHECK-NEXT: [[SEL1:%.*]] = phi i32 [ [[A:%.*]], [[TMP0:%.*]] ], [ [[B:%.*]], [[SELECT_FALSE]] ]
|
||||
; CHECK-NEXT: [[SEL2:%.*]] = phi i32 [ [[C:%.*]], [[TMP0]] ], [ [[B]], [[SELECT_FALSE]] ]
|
||||
; CHECK-NEXT: [[SUB:%.*]] = sub i32 [[SEL1]], [[SEL2]]
|
||||
; CHECK-NEXT: ret i32 [[SUB]]
|
||||
;
|
||||
%sel1 = select i1 %cmp, i32 %a, i32 %b, !prof !0
|
||||
%sel2 = select i1 %cmp, i32 %c, i32 %sel1, !prof !0
|
||||
%sub = sub i32 %sel1, %sel2
|
||||
ret i32 %sub
|
||||
}
|
||||
|
||||
; Function Attrs: nounwind readnone speculatable willreturn
|
||||
declare void @llvm.dbg.value(metadata, metadata, metadata)
|
||||
|
||||
!llvm.module.flags = !{!6, !7}
|
||||
|
||||
!0 = !{!"branch_weights", i32 1, i32 100}
|
||||
!1 = !DIFile(filename: "test.c", directory: "/test")
|
||||
!2 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 15.0.0", isOptimized: true, emissionKind: FullDebug, globals: !5, splitDebugInlining: false, nameTableKind: None)
|
||||
!3 = distinct !DISubprogram(name: "test", scope: !1, file: !1, line: 1, unit: !2)
|
||||
!4 = !DILocalVariable(name: "x", scope: !3)
|
||||
!5 = !{}
|
||||
!6 = !{i32 2, !"Dwarf Version", i32 4}
|
||||
!7 = !{i32 1, !"Debug Info Version", i32 3}
|
Loading…
Reference in New Issue