forked from OSchip/llvm-project
2135 lines
73 KiB
C++
2135 lines
73 KiB
C++
//===- bolt/Passes/ShrinkWrapping.cpp -------------------------------------===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
// This file implements the ShrinkWrapping class.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "bolt/Passes/ShrinkWrapping.h"
|
|
#include "bolt/Core/MCPlus.h"
|
|
#include "bolt/Passes/DataflowInfoManager.h"
|
|
#include "bolt/Passes/MCF.h"
|
|
#include "bolt/Utils/CommandLineOpts.h"
|
|
#include <numeric>
|
|
#include <stack>
|
|
|
|
#define DEBUG_TYPE "shrinkwrapping"
|
|
|
|
using namespace llvm;
|
|
|
|
namespace opts {
|
|
|
|
extern cl::opt<bool> TimeOpts;
|
|
extern cl::OptionCategory BoltOptCategory;
|
|
|
|
static cl::opt<unsigned> ShrinkWrappingThreshold(
|
|
"shrink-wrapping-threshold",
|
|
cl::desc("Percentage of prologue execution count to use as threshold when"
|
|
" evaluating whether a block is cold enough to be profitable to"
|
|
" move eligible spills there"),
|
|
cl::init(30), cl::ZeroOrMore, cl::cat(BoltOptCategory));
|
|
} // namespace opts
|
|
|
|
namespace llvm {
|
|
namespace bolt {
|
|
|
|
void CalleeSavedAnalysis::analyzeSaves() {
|
|
ReachingDefOrUse</*Def=*/true> &RD = Info.getReachingDefs();
|
|
StackReachingUses &SRU = Info.getStackReachingUses();
|
|
auto &InsnToBB = Info.getInsnToBBMap();
|
|
BitVector BlacklistedRegs(BC.MRI->getNumRegs(), false);
|
|
|
|
LLVM_DEBUG(dbgs() << "Checking spill locations\n");
|
|
for (BinaryBasicBlock &BB : BF) {
|
|
LLVM_DEBUG(dbgs() << "\tNow at BB " << BB.getName() << "\n");
|
|
const MCInst *Prev = nullptr;
|
|
for (MCInst &Inst : BB) {
|
|
if (ErrorOr<const FrameIndexEntry &> FIE = FA.getFIEFor(Inst)) {
|
|
// Blacklist weird stores we don't understand
|
|
if ((!FIE->IsSimple || FIE->StackOffset >= 0) && FIE->IsStore &&
|
|
FIE->IsStoreFromReg) {
|
|
BlacklistedRegs.set(FIE->RegOrImm);
|
|
CalleeSaved.reset(FIE->RegOrImm);
|
|
Prev = &Inst;
|
|
continue;
|
|
}
|
|
|
|
if (!FIE->IsStore || !FIE->IsStoreFromReg ||
|
|
BlacklistedRegs[FIE->RegOrImm]) {
|
|
Prev = &Inst;
|
|
continue;
|
|
}
|
|
|
|
// If this reg is defined locally, it is not a callee-saved reg
|
|
if (RD.isReachedBy(FIE->RegOrImm,
|
|
Prev ? RD.expr_begin(*Prev) : RD.expr_begin(BB))) {
|
|
BlacklistedRegs.set(FIE->RegOrImm);
|
|
CalleeSaved.reset(FIE->RegOrImm);
|
|
Prev = &Inst;
|
|
continue;
|
|
}
|
|
|
|
// If this stack position is accessed in another function, we are
|
|
// probably dealing with a parameter passed in a stack -- do not mess
|
|
// with it
|
|
if (SRU.isStoreUsed(*FIE,
|
|
Prev ? SRU.expr_begin(*Prev) : SRU.expr_begin(BB)),
|
|
/*IncludeLocalAccesses=*/false) {
|
|
BlacklistedRegs.set(FIE->RegOrImm);
|
|
CalleeSaved.reset(FIE->RegOrImm);
|
|
Prev = &Inst;
|
|
continue;
|
|
}
|
|
|
|
// If this stack position is loaded elsewhere in another reg, we can't
|
|
// update it, so blacklist it.
|
|
if (SRU.isLoadedInDifferentReg(*FIE, Prev ? SRU.expr_begin(*Prev)
|
|
: SRU.expr_begin(BB))) {
|
|
BlacklistedRegs.set(FIE->RegOrImm);
|
|
CalleeSaved.reset(FIE->RegOrImm);
|
|
Prev = &Inst;
|
|
continue;
|
|
}
|
|
|
|
// Ignore regs with multiple saves
|
|
if (CalleeSaved[FIE->RegOrImm]) {
|
|
BlacklistedRegs.set(FIE->RegOrImm);
|
|
CalleeSaved.reset(FIE->RegOrImm);
|
|
Prev = &Inst;
|
|
continue;
|
|
}
|
|
|
|
CalleeSaved.set(FIE->RegOrImm);
|
|
SaveFIEByReg[FIE->RegOrImm] = &*FIE;
|
|
SavingCost[FIE->RegOrImm] += InsnToBB[&Inst]->getKnownExecutionCount();
|
|
BC.MIB->addAnnotation(Inst, getSaveTag(), FIE->RegOrImm, AllocatorId);
|
|
OffsetsByReg[FIE->RegOrImm] = FIE->StackOffset;
|
|
LLVM_DEBUG(dbgs() << "Logging new candidate for Callee-Saved Reg: "
|
|
<< FIE->RegOrImm << "\n");
|
|
}
|
|
Prev = &Inst;
|
|
}
|
|
}
|
|
}
|
|
|
|
void CalleeSavedAnalysis::analyzeRestores() {
|
|
ReachingDefOrUse</*Def=*/false> &RU = Info.getReachingUses();
|
|
|
|
// Now compute all restores of these callee-saved regs
|
|
for (BinaryBasicBlock &BB : BF) {
|
|
const MCInst *Prev = nullptr;
|
|
for (auto I = BB.rbegin(), E = BB.rend(); I != E; ++I) {
|
|
MCInst &Inst = *I;
|
|
if (ErrorOr<const FrameIndexEntry &> FIE = FA.getFIEFor(Inst)) {
|
|
if (!FIE->IsLoad || !CalleeSaved[FIE->RegOrImm]) {
|
|
Prev = &Inst;
|
|
continue;
|
|
}
|
|
|
|
// If this reg is used locally after a restore, then we are probably
|
|
// not dealing with a callee-saved reg. Except if this use is by
|
|
// another store, but we don't cover this case yet.
|
|
// Also not callee-saved if this load accesses caller stack or isn't
|
|
// simple.
|
|
if (!FIE->IsSimple || FIE->StackOffset >= 0 ||
|
|
RU.isReachedBy(FIE->RegOrImm,
|
|
Prev ? RU.expr_begin(*Prev) : RU.expr_begin(BB))) {
|
|
CalleeSaved.reset(FIE->RegOrImm);
|
|
Prev = &Inst;
|
|
continue;
|
|
}
|
|
// If stack offsets between saves/store don't agree with each other,
|
|
// we don't completely understand what's happening here
|
|
if (FIE->StackOffset != OffsetsByReg[FIE->RegOrImm]) {
|
|
CalleeSaved.reset(FIE->RegOrImm);
|
|
LLVM_DEBUG(dbgs() << "Dismissing Callee-Saved Reg because we found a "
|
|
"mismatching restore: "
|
|
<< FIE->RegOrImm << "\n");
|
|
Prev = &Inst;
|
|
continue;
|
|
}
|
|
|
|
LLVM_DEBUG(dbgs() << "Adding matching restore for: " << FIE->RegOrImm
|
|
<< "\n");
|
|
if (LoadFIEByReg[FIE->RegOrImm] == nullptr)
|
|
LoadFIEByReg[FIE->RegOrImm] = &*FIE;
|
|
BC.MIB->addAnnotation(Inst, getRestoreTag(), FIE->RegOrImm,
|
|
AllocatorId);
|
|
HasRestores.set(FIE->RegOrImm);
|
|
}
|
|
Prev = &Inst;
|
|
}
|
|
}
|
|
}
|
|
|
|
std::vector<MCInst *> CalleeSavedAnalysis::getSavesByReg(uint16_t Reg) {
|
|
std::vector<MCInst *> Results;
|
|
for (BinaryBasicBlock &BB : BF)
|
|
for (MCInst &Inst : BB)
|
|
if (getSavedReg(Inst) == Reg)
|
|
Results.push_back(&Inst);
|
|
return Results;
|
|
}
|
|
|
|
std::vector<MCInst *> CalleeSavedAnalysis::getRestoresByReg(uint16_t Reg) {
|
|
std::vector<MCInst *> Results;
|
|
for (BinaryBasicBlock &BB : BF)
|
|
for (MCInst &Inst : BB)
|
|
if (getRestoredReg(Inst) == Reg)
|
|
Results.push_back(&Inst);
|
|
return Results;
|
|
}
|
|
|
|
CalleeSavedAnalysis::~CalleeSavedAnalysis() {
|
|
for (BinaryBasicBlock &BB : BF) {
|
|
for (MCInst &Inst : BB) {
|
|
BC.MIB->removeAnnotation(Inst, getSaveTag());
|
|
BC.MIB->removeAnnotation(Inst, getRestoreTag());
|
|
}
|
|
}
|
|
}
|
|
|
|
void StackLayoutModifier::blacklistRegion(int64_t Offset, int64_t Size) {
|
|
if (BlacklistedRegions[Offset] < Size)
|
|
BlacklistedRegions[Offset] = Size;
|
|
}
|
|
|
|
bool StackLayoutModifier::isRegionBlacklisted(int64_t Offset, int64_t Size) {
|
|
for (std::pair<const int64_t, int64_t> Elem : BlacklistedRegions)
|
|
if (Offset + Size > Elem.first && Offset < Elem.first + Elem.second)
|
|
return true;
|
|
return false;
|
|
}
|
|
|
|
bool StackLayoutModifier::blacklistAllInConflictWith(int64_t Offset,
|
|
int64_t Size) {
|
|
bool HasConflict = false;
|
|
for (auto Iter = AvailableRegions.begin(); Iter != AvailableRegions.end();) {
|
|
std::pair<const int64_t, int64_t> &Elem = *Iter;
|
|
if (Offset + Size > Elem.first && Offset < Elem.first + Elem.second &&
|
|
(Offset != Elem.first || Size != Elem.second)) {
|
|
Iter = AvailableRegions.erase(Iter);
|
|
HasConflict = true;
|
|
continue;
|
|
}
|
|
++Iter;
|
|
}
|
|
if (HasConflict) {
|
|
blacklistRegion(Offset, Size);
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
void StackLayoutModifier::checkFramePointerInitialization(MCInst &Point) {
|
|
StackPointerTracking &SPT = Info.getStackPointerTracking();
|
|
if (!BC.MII->get(Point.getOpcode())
|
|
.hasDefOfPhysReg(Point, BC.MIB->getFramePointer(), *BC.MRI))
|
|
return;
|
|
|
|
int SPVal, FPVal;
|
|
std::tie(SPVal, FPVal) = *SPT.getStateBefore(Point);
|
|
std::pair<MCPhysReg, int64_t> FP;
|
|
|
|
if (FPVal != SPT.EMPTY && FPVal != SPT.SUPERPOSITION)
|
|
FP = std::make_pair(BC.MIB->getFramePointer(), FPVal);
|
|
else
|
|
FP = std::make_pair(0, 0);
|
|
std::pair<MCPhysReg, int64_t> SP;
|
|
|
|
if (SPVal != SPT.EMPTY && SPVal != SPT.SUPERPOSITION)
|
|
SP = std::make_pair(BC.MIB->getStackPointer(), SPVal);
|
|
else
|
|
SP = std::make_pair(0, 0);
|
|
|
|
int64_t Output;
|
|
if (!BC.MIB->evaluateStackOffsetExpr(Point, Output, SP, FP))
|
|
return;
|
|
|
|
// Not your regular frame pointer initialization... bail
|
|
if (Output != SPVal)
|
|
blacklistRegion(0, 0);
|
|
}
|
|
|
|
void StackLayoutModifier::checkStackPointerRestore(MCInst &Point) {
|
|
StackPointerTracking &SPT = Info.getStackPointerTracking();
|
|
if (!BC.MII->get(Point.getOpcode())
|
|
.hasDefOfPhysReg(Point, BC.MIB->getStackPointer(), *BC.MRI))
|
|
return;
|
|
// Check if the definition of SP comes from FP -- in this case, this
|
|
// value may need to be updated depending on our stack layout changes
|
|
const MCInstrDesc &InstInfo = BC.MII->get(Point.getOpcode());
|
|
unsigned NumDefs = InstInfo.getNumDefs();
|
|
bool UsesFP = false;
|
|
for (unsigned I = NumDefs, E = MCPlus::getNumPrimeOperands(Point); I < E;
|
|
++I) {
|
|
MCOperand &Operand = Point.getOperand(I);
|
|
if (!Operand.isReg())
|
|
continue;
|
|
if (Operand.getReg() == BC.MIB->getFramePointer()) {
|
|
UsesFP = true;
|
|
break;
|
|
}
|
|
}
|
|
if (!UsesFP)
|
|
return;
|
|
|
|
// Setting up evaluation
|
|
int SPVal, FPVal;
|
|
std::tie(SPVal, FPVal) = *SPT.getStateBefore(Point);
|
|
std::pair<MCPhysReg, int64_t> FP;
|
|
|
|
if (FPVal != SPT.EMPTY && FPVal != SPT.SUPERPOSITION)
|
|
FP = std::make_pair(BC.MIB->getFramePointer(), FPVal);
|
|
else
|
|
FP = std::make_pair(0, 0);
|
|
std::pair<MCPhysReg, int64_t> SP;
|
|
|
|
if (SPVal != SPT.EMPTY && SPVal != SPT.SUPERPOSITION)
|
|
SP = std::make_pair(BC.MIB->getStackPointer(), SPVal);
|
|
else
|
|
SP = std::make_pair(0, 0);
|
|
|
|
int64_t Output;
|
|
if (!BC.MIB->evaluateStackOffsetExpr(Point, Output, SP, FP))
|
|
return;
|
|
|
|
// If the value is the same of FP, no need to adjust it
|
|
if (Output == FPVal)
|
|
return;
|
|
|
|
// If an allocation happened through FP, bail
|
|
if (Output <= SPVal) {
|
|
blacklistRegion(0, 0);
|
|
return;
|
|
}
|
|
|
|
// We are restoring SP to an old value based on FP. Mark it as a stack
|
|
// access to be fixed later.
|
|
BC.MIB->addAnnotation(Point, getSlotTag(), Output, AllocatorId);
|
|
}
|
|
|
|
void StackLayoutModifier::classifyStackAccesses() {
|
|
// Understand when stack slots are being used non-locally
|
|
StackReachingUses &SRU = Info.getStackReachingUses();
|
|
|
|
for (BinaryBasicBlock &BB : BF) {
|
|
const MCInst *Prev = nullptr;
|
|
for (auto I = BB.rbegin(), E = BB.rend(); I != E; ++I) {
|
|
MCInst &Inst = *I;
|
|
checkFramePointerInitialization(Inst);
|
|
checkStackPointerRestore(Inst);
|
|
ErrorOr<const FrameIndexEntry &> FIEX = FA.getFIEFor(Inst);
|
|
if (!FIEX) {
|
|
Prev = &Inst;
|
|
continue;
|
|
}
|
|
if (!FIEX->IsSimple || (FIEX->IsStore && !FIEX->IsStoreFromReg)) {
|
|
blacklistRegion(FIEX->StackOffset, FIEX->Size);
|
|
Prev = &Inst;
|
|
continue;
|
|
}
|
|
// If this stack position is accessed in another function, we are
|
|
// probably dealing with a parameter passed in a stack -- do not mess
|
|
// with it
|
|
if (SRU.isStoreUsed(*FIEX,
|
|
Prev ? SRU.expr_begin(*Prev) : SRU.expr_begin(BB),
|
|
/*IncludeLocalAccesses=*/false)) {
|
|
blacklistRegion(FIEX->StackOffset, FIEX->Size);
|
|
Prev = &Inst;
|
|
continue;
|
|
}
|
|
// Now we have a clear stack slot access. Check if its blacklisted or if
|
|
// it conflicts with another chunk.
|
|
if (isRegionBlacklisted(FIEX->StackOffset, FIEX->Size) ||
|
|
blacklistAllInConflictWith(FIEX->StackOffset, FIEX->Size)) {
|
|
Prev = &Inst;
|
|
continue;
|
|
}
|
|
// We are free to go. Add it as available stack slot which we know how
|
|
// to move it.
|
|
AvailableRegions[FIEX->StackOffset] = FIEX->Size;
|
|
BC.MIB->addAnnotation(Inst, getSlotTag(), FIEX->StackOffset, AllocatorId);
|
|
RegionToRegMap[FIEX->StackOffset].insert(FIEX->RegOrImm);
|
|
RegToRegionMap[FIEX->RegOrImm].insert(FIEX->StackOffset);
|
|
LLVM_DEBUG(dbgs() << "Adding region " << FIEX->StackOffset << " size "
|
|
<< (int)FIEX->Size << "\n");
|
|
}
|
|
}
|
|
}
|
|
|
|
void StackLayoutModifier::classifyCFIs() {
|
|
std::stack<std::pair<int64_t, uint16_t>> CFIStack;
|
|
int64_t CfaOffset = -8;
|
|
uint16_t CfaReg = 7;
|
|
|
|
auto recordAccess = [&](MCInst *Inst, int64_t Offset) {
|
|
const uint16_t Reg = *BC.MRI->getLLVMRegNum(CfaReg, /*isEH=*/false);
|
|
if (Reg == BC.MIB->getStackPointer() || Reg == BC.MIB->getFramePointer()) {
|
|
BC.MIB->addAnnotation(*Inst, getSlotTag(), Offset, AllocatorId);
|
|
LLVM_DEBUG(dbgs() << "Recording CFI " << Offset << "\n");
|
|
} else {
|
|
IsSimple = false;
|
|
return;
|
|
}
|
|
};
|
|
|
|
for (BinaryBasicBlock *BB : BF.getLayout().blocks()) {
|
|
for (MCInst &Inst : *BB) {
|
|
if (!BC.MIB->isCFI(Inst))
|
|
continue;
|
|
const MCCFIInstruction *CFI = BF.getCFIFor(Inst);
|
|
switch (CFI->getOperation()) {
|
|
case MCCFIInstruction::OpDefCfa:
|
|
CfaOffset = -CFI->getOffset();
|
|
recordAccess(&Inst, CfaOffset);
|
|
LLVM_FALLTHROUGH;
|
|
case MCCFIInstruction::OpDefCfaRegister:
|
|
CfaReg = CFI->getRegister();
|
|
break;
|
|
case MCCFIInstruction::OpDefCfaOffset:
|
|
CfaOffset = -CFI->getOffset();
|
|
recordAccess(&Inst, CfaOffset);
|
|
break;
|
|
case MCCFIInstruction::OpOffset:
|
|
recordAccess(&Inst, CFI->getOffset());
|
|
BC.MIB->addAnnotation(Inst, getOffsetCFIRegTag(),
|
|
BC.MRI->getLLVMRegNum(CFI->getRegister(),
|
|
/*isEH=*/false),
|
|
AllocatorId);
|
|
break;
|
|
case MCCFIInstruction::OpSameValue:
|
|
BC.MIB->addAnnotation(Inst, getOffsetCFIRegTag(),
|
|
BC.MRI->getLLVMRegNum(CFI->getRegister(),
|
|
/*isEH=*/false),
|
|
AllocatorId);
|
|
break;
|
|
case MCCFIInstruction::OpRememberState:
|
|
CFIStack.push(std::make_pair(CfaOffset, CfaReg));
|
|
break;
|
|
case MCCFIInstruction::OpRestoreState: {
|
|
assert(!CFIStack.empty() && "Corrupt CFI stack");
|
|
std::pair<int64_t, uint16_t> &Elem = CFIStack.top();
|
|
CFIStack.pop();
|
|
CfaOffset = Elem.first;
|
|
CfaReg = Elem.second;
|
|
break;
|
|
}
|
|
case MCCFIInstruction::OpRelOffset:
|
|
case MCCFIInstruction::OpAdjustCfaOffset:
|
|
llvm_unreachable("Unhandled AdjustCfaOffset");
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
void StackLayoutModifier::scheduleChange(
|
|
MCInst &Inst, StackLayoutModifier::WorklistItem Item) {
|
|
auto &WList = BC.MIB->getOrCreateAnnotationAs<std::vector<WorklistItem>>(
|
|
Inst, getTodoTag(), AllocatorId);
|
|
WList.push_back(Item);
|
|
}
|
|
|
|
bool StackLayoutModifier::canCollapseRegion(MCInst *DeletedPush) {
|
|
if (!IsSimple || !BC.MIB->isPush(*DeletedPush))
|
|
return false;
|
|
|
|
ErrorOr<const FrameIndexEntry &> FIE = FA.getFIEFor(*DeletedPush);
|
|
if (!FIE)
|
|
return false;
|
|
|
|
return canCollapseRegion(FIE->StackOffset);
|
|
}
|
|
|
|
bool StackLayoutModifier::canCollapseRegion(int64_t RegionAddr) {
|
|
if (!IsInitialized)
|
|
initialize();
|
|
if (!IsSimple)
|
|
return false;
|
|
|
|
if (CollapsedRegions.count(RegionAddr))
|
|
return true;
|
|
|
|
// Check if it is possible to readjust all accesses below RegionAddr
|
|
if (!BlacklistedRegions.empty())
|
|
return false;
|
|
|
|
return true;
|
|
}
|
|
|
|
bool StackLayoutModifier::collapseRegion(MCInst *DeletedPush) {
|
|
ErrorOr<const FrameIndexEntry &> FIE = FA.getFIEFor(*DeletedPush);
|
|
if (!FIE)
|
|
return false;
|
|
int64_t RegionAddr = FIE->StackOffset;
|
|
int64_t RegionSz = FIE->Size;
|
|
return collapseRegion(DeletedPush, RegionAddr, RegionSz);
|
|
}
|
|
|
|
bool StackLayoutModifier::collapseRegion(MCInst *Alloc, int64_t RegionAddr,
|
|
int64_t RegionSz) {
|
|
if (!canCollapseRegion(RegionAddr))
|
|
return false;
|
|
|
|
assert(IsInitialized);
|
|
StackAllocationAnalysis &SAA = Info.getStackAllocationAnalysis();
|
|
|
|
for (BinaryBasicBlock &BB : BF) {
|
|
for (MCInst &Inst : BB) {
|
|
if (!BC.MIB->hasAnnotation(Inst, getSlotTag()))
|
|
continue;
|
|
auto Slot =
|
|
BC.MIB->getAnnotationAs<decltype(FrameIndexEntry::StackOffset)>(
|
|
Inst, getSlotTag());
|
|
if (!AvailableRegions.count(Slot))
|
|
continue;
|
|
// We need to ensure this access is affected by the deleted push
|
|
if (!(*SAA.getStateBefore(Inst))[SAA.ExprToIdx[Alloc]])
|
|
continue;
|
|
|
|
if (BC.MIB->isCFI(Inst)) {
|
|
if (Slot > RegionAddr)
|
|
continue;
|
|
scheduleChange(Inst, WorklistItem(WorklistItem::AdjustCFI, RegionSz));
|
|
continue;
|
|
}
|
|
ErrorOr<const FrameIndexEntry &> FIE = FA.getFIEFor(Inst);
|
|
if (!FIE) {
|
|
if (Slot > RegionAddr)
|
|
continue;
|
|
// SP update based on frame pointer
|
|
scheduleChange(
|
|
Inst, WorklistItem(WorklistItem::AdjustLoadStoreOffset, RegionSz));
|
|
continue;
|
|
}
|
|
|
|
if (Slot == RegionAddr) {
|
|
BC.MIB->addAnnotation(Inst, "AccessesDeletedPos", 0U, AllocatorId);
|
|
continue;
|
|
}
|
|
if (BC.MIB->isPush(Inst) || BC.MIB->isPop(Inst))
|
|
continue;
|
|
|
|
if (FIE->StackPtrReg == BC.MIB->getStackPointer() && Slot < RegionAddr)
|
|
continue;
|
|
|
|
if (FIE->StackPtrReg == BC.MIB->getFramePointer() && Slot > RegionAddr)
|
|
continue;
|
|
|
|
scheduleChange(
|
|
Inst, WorklistItem(WorklistItem::AdjustLoadStoreOffset, RegionSz));
|
|
}
|
|
}
|
|
|
|
CollapsedRegions.insert(RegionAddr);
|
|
return true;
|
|
}
|
|
|
|
void StackLayoutModifier::setOffsetForCollapsedAccesses(int64_t NewOffset) {
|
|
for (BinaryBasicBlock &BB : BF) {
|
|
for (MCInst &Inst : BB) {
|
|
if (!BC.MIB->hasAnnotation(Inst, "AccessesDeletedPos"))
|
|
continue;
|
|
BC.MIB->removeAnnotation(Inst, "AccessesDeletedPos");
|
|
scheduleChange(
|
|
Inst, WorklistItem(WorklistItem::AdjustLoadStoreOffset, NewOffset));
|
|
}
|
|
}
|
|
}
|
|
|
|
bool StackLayoutModifier::canInsertRegion(ProgramPoint P) {
|
|
if (!IsInitialized)
|
|
initialize();
|
|
if (!IsSimple)
|
|
return false;
|
|
|
|
StackPointerTracking &SPT = Info.getStackPointerTracking();
|
|
int64_t RegionAddr = SPT.getStateBefore(P)->first;
|
|
if (RegionAddr == SPT.SUPERPOSITION || RegionAddr == SPT.EMPTY)
|
|
return false;
|
|
|
|
if (InsertedRegions.count(RegionAddr))
|
|
return true;
|
|
|
|
// Check if we are going to screw up stack accesses at call sites that
|
|
// pass parameters via stack
|
|
if (!BlacklistedRegions.empty())
|
|
return false;
|
|
|
|
return true;
|
|
}
|
|
|
|
bool StackLayoutModifier::insertRegion(ProgramPoint P, int64_t RegionSz) {
|
|
if (!canInsertRegion(P))
|
|
return false;
|
|
|
|
assert(IsInitialized);
|
|
StackPointerTracking &SPT = Info.getStackPointerTracking();
|
|
// This RegionAddr is slightly different from the one seen in collapseRegion
|
|
// This is the value of SP before the allocation the user wants to make.
|
|
int64_t RegionAddr = SPT.getStateBefore(P)->first;
|
|
if (RegionAddr == SPT.SUPERPOSITION || RegionAddr == SPT.EMPTY)
|
|
return false;
|
|
|
|
DominatorAnalysis<false> &DA = Info.getDominatorAnalysis();
|
|
|
|
for (BinaryBasicBlock &BB : BF) {
|
|
for (MCInst &Inst : BB) {
|
|
if (!BC.MIB->hasAnnotation(Inst, getSlotTag()))
|
|
continue;
|
|
auto Slot =
|
|
BC.MIB->getAnnotationAs<decltype(FrameIndexEntry::StackOffset)>(
|
|
Inst, getSlotTag());
|
|
if (!AvailableRegions.count(Slot))
|
|
continue;
|
|
|
|
if (!(DA.doesADominateB(P, Inst)))
|
|
continue;
|
|
|
|
if (BC.MIB->isCFI(Inst)) {
|
|
if (Slot >= RegionAddr)
|
|
continue;
|
|
scheduleChange(Inst, WorklistItem(WorklistItem::AdjustCFI, -RegionSz));
|
|
continue;
|
|
}
|
|
ErrorOr<const FrameIndexEntry &> FIE = FA.getFIEFor(Inst);
|
|
if (!FIE) {
|
|
if (Slot >= RegionAddr)
|
|
continue;
|
|
scheduleChange(
|
|
Inst, WorklistItem(WorklistItem::AdjustLoadStoreOffset, -RegionSz));
|
|
continue;
|
|
}
|
|
|
|
if (FIE->StackPtrReg == BC.MIB->getStackPointer() && Slot < RegionAddr)
|
|
continue;
|
|
if (FIE->StackPtrReg == BC.MIB->getFramePointer() && Slot >= RegionAddr)
|
|
continue;
|
|
if (BC.MIB->isPush(Inst) || BC.MIB->isPop(Inst))
|
|
continue;
|
|
scheduleChange(
|
|
Inst, WorklistItem(WorklistItem::AdjustLoadStoreOffset, -RegionSz));
|
|
}
|
|
}
|
|
|
|
InsertedRegions.insert(RegionAddr);
|
|
return true;
|
|
}
|
|
|
|
void StackLayoutModifier::performChanges() {
|
|
std::set<uint32_t> ModifiedCFIIndices;
|
|
for (BinaryBasicBlock &BB : BF) {
|
|
for (auto I = BB.rbegin(), E = BB.rend(); I != E; ++I) {
|
|
MCInst &Inst = *I;
|
|
if (BC.MIB->hasAnnotation(Inst, "AccessesDeletedPos")) {
|
|
assert(BC.MIB->isPop(Inst) || BC.MIB->isPush(Inst));
|
|
BC.MIB->removeAnnotation(Inst, "AccessesDeletedPos");
|
|
}
|
|
if (!BC.MIB->hasAnnotation(Inst, getTodoTag()))
|
|
continue;
|
|
auto &WList = BC.MIB->getAnnotationAs<std::vector<WorklistItem>>(
|
|
Inst, getTodoTag());
|
|
int64_t Adjustment = 0;
|
|
WorklistItem::ActionType AdjustmentType = WorklistItem::None;
|
|
for (WorklistItem &WI : WList) {
|
|
if (WI.Action == WorklistItem::None)
|
|
continue;
|
|
assert(WI.Action == WorklistItem::AdjustLoadStoreOffset ||
|
|
WI.Action == WorklistItem::AdjustCFI);
|
|
assert((AdjustmentType == WorklistItem::None ||
|
|
AdjustmentType == WI.Action) &&
|
|
"Conflicting actions requested at the same program point");
|
|
AdjustmentType = WI.Action;
|
|
Adjustment += WI.OffsetUpdate;
|
|
}
|
|
if (!Adjustment)
|
|
continue;
|
|
if (AdjustmentType != WorklistItem::AdjustLoadStoreOffset) {
|
|
assert(BC.MIB->isCFI(Inst));
|
|
uint32_t CFINum = Inst.getOperand(0).getImm();
|
|
if (ModifiedCFIIndices.count(CFINum))
|
|
continue;
|
|
ModifiedCFIIndices.insert(CFINum);
|
|
const MCCFIInstruction *CFI = BF.getCFIFor(Inst);
|
|
const MCCFIInstruction::OpType Operation = CFI->getOperation();
|
|
if (Operation == MCCFIInstruction::OpDefCfa ||
|
|
Operation == MCCFIInstruction::OpDefCfaOffset)
|
|
Adjustment = 0 - Adjustment;
|
|
LLVM_DEBUG(dbgs() << "Changing CFI offset from " << CFI->getOffset()
|
|
<< " to " << (CFI->getOffset() + Adjustment) << "\n");
|
|
BF.mutateCFIOffsetFor(Inst, CFI->getOffset() + Adjustment);
|
|
continue;
|
|
}
|
|
int32_t SrcImm = 0;
|
|
MCPhysReg Reg = 0;
|
|
MCPhysReg StackPtrReg = 0;
|
|
int64_t StackOffset = 0;
|
|
bool IsIndexed = false;
|
|
bool IsLoad = false;
|
|
bool IsStore = false;
|
|
bool IsSimple = false;
|
|
bool IsStoreFromReg = false;
|
|
uint8_t Size = 0;
|
|
bool Success = false;
|
|
Success = BC.MIB->isStackAccess(Inst, IsLoad, IsStore, IsStoreFromReg,
|
|
Reg, SrcImm, StackPtrReg, StackOffset,
|
|
Size, IsSimple, IsIndexed);
|
|
if (!Success) {
|
|
// SP update based on FP value
|
|
Success = BC.MIB->addToImm(Inst, Adjustment, &*BC.Ctx);
|
|
assert(Success);
|
|
continue;
|
|
}
|
|
assert(Success && IsSimple && !IsIndexed && (!IsStore || IsStoreFromReg));
|
|
if (StackPtrReg != BC.MIB->getFramePointer())
|
|
Adjustment = -Adjustment;
|
|
if (IsLoad)
|
|
Success = BC.MIB->createRestoreFromStack(
|
|
Inst, StackPtrReg, StackOffset + Adjustment, Reg, Size);
|
|
else if (IsStore)
|
|
Success = BC.MIB->createSaveToStack(
|
|
Inst, StackPtrReg, StackOffset + Adjustment, Reg, Size);
|
|
LLVM_DEBUG({
|
|
dbgs() << "Adjusted instruction: ";
|
|
Inst.dump();
|
|
});
|
|
assert(Success);
|
|
}
|
|
}
|
|
}
|
|
|
|
void StackLayoutModifier::initialize() {
|
|
classifyStackAccesses();
|
|
classifyCFIs();
|
|
IsInitialized = true;
|
|
}
|
|
|
|
std::atomic<std::uint64_t> ShrinkWrapping::SpillsMovedRegularMode{0};
|
|
std::atomic<std::uint64_t> ShrinkWrapping::SpillsMovedPushPopMode{0};
|
|
std::atomic<std::uint64_t> ShrinkWrapping::SpillsMovedDynamicCount{0};
|
|
std::atomic<std::uint64_t> ShrinkWrapping::SpillsFailedDynamicCount{0};
|
|
std::atomic<std::uint64_t> ShrinkWrapping::InstrDynamicCount{0};
|
|
std::atomic<std::uint64_t> ShrinkWrapping::StoreDynamicCount{0};
|
|
|
|
using BBIterTy = BinaryBasicBlock::iterator;
|
|
|
|
void ShrinkWrapping::classifyCSRUses() {
|
|
DominatorAnalysis<false> &DA = Info.getDominatorAnalysis();
|
|
StackPointerTracking &SPT = Info.getStackPointerTracking();
|
|
UsesByReg = std::vector<BitVector>(BC.MRI->getNumRegs(),
|
|
BitVector(DA.NumInstrs, false));
|
|
|
|
const BitVector &FPAliases = BC.MIB->getAliases(BC.MIB->getFramePointer());
|
|
for (BinaryBasicBlock &BB : BF) {
|
|
for (MCInst &Inst : BB) {
|
|
if (BC.MIB->isCFI(Inst))
|
|
continue;
|
|
BitVector BV = BitVector(BC.MRI->getNumRegs(), false);
|
|
BC.MIB->getTouchedRegs(Inst, BV);
|
|
BV &= CSA.CalleeSaved;
|
|
for (int I : BV.set_bits()) {
|
|
if (I == 0)
|
|
continue;
|
|
if (CSA.getSavedReg(Inst) != I && CSA.getRestoredReg(Inst) != I)
|
|
UsesByReg[I].set(DA.ExprToIdx[&Inst]);
|
|
}
|
|
if (!SPT.HasFramePointer || !BC.MIB->isCall(Inst))
|
|
continue;
|
|
BV = CSA.CalleeSaved;
|
|
BV &= FPAliases;
|
|
for (int I : BV.set_bits())
|
|
UsesByReg[I].set(DA.ExprToIdx[&Inst]);
|
|
}
|
|
}
|
|
}
|
|
|
|
void ShrinkWrapping::pruneUnwantedCSRs() {
|
|
BitVector ParamRegs = BC.MIB->getRegsUsedAsParams();
|
|
for (unsigned I = 0, E = BC.MRI->getNumRegs(); I != E; ++I) {
|
|
if (!CSA.CalleeSaved[I])
|
|
continue;
|
|
if (ParamRegs[I]) {
|
|
CSA.CalleeSaved.reset(I);
|
|
continue;
|
|
}
|
|
if (UsesByReg[I].empty()) {
|
|
LLVM_DEBUG(
|
|
dbgs()
|
|
<< "Dismissing Callee-Saved Reg because we found no uses of it:" << I
|
|
<< "\n");
|
|
CSA.CalleeSaved.reset(I);
|
|
continue;
|
|
}
|
|
if (!CSA.HasRestores[I]) {
|
|
LLVM_DEBUG(
|
|
dbgs() << "Dismissing Callee-Saved Reg because it does not have "
|
|
"restores:"
|
|
<< I << "\n");
|
|
CSA.CalleeSaved.reset(I);
|
|
}
|
|
}
|
|
}
|
|
|
|
void ShrinkWrapping::computeSaveLocations() {
|
|
BestSavePos = std::vector<std::vector<MCInst *>>(BC.MRI->getNumRegs());
|
|
ReachingInsns<true> &RI = Info.getReachingInsnsBackwards();
|
|
DominatorAnalysis<false> &DA = Info.getDominatorAnalysis();
|
|
StackPointerTracking &SPT = Info.getStackPointerTracking();
|
|
|
|
LLVM_DEBUG(dbgs() << "Checking save/restore possibilities\n");
|
|
for (BinaryBasicBlock &BB : BF) {
|
|
LLVM_DEBUG(dbgs() << "\tNow at BB " << BB.getName() << "\n");
|
|
|
|
MCInst *First = BB.begin() != BB.end() ? &*BB.begin() : nullptr;
|
|
if (!First)
|
|
continue;
|
|
|
|
// Use reaching instructions to detect if we are inside a loop - if we
|
|
// are, do not consider this BB as valid placement for saves.
|
|
if (RI.isInLoop(BB))
|
|
continue;
|
|
|
|
const std::pair<int, int> SPFP = *SPT.getStateBefore(*First);
|
|
// If we don't know stack state at this point, bail
|
|
if ((SPFP.first == SPT.SUPERPOSITION || SPFP.first == SPT.EMPTY) &&
|
|
(SPFP.second == SPT.SUPERPOSITION || SPFP.second == SPT.EMPTY))
|
|
continue;
|
|
|
|
for (unsigned I = 0, E = BC.MRI->getNumRegs(); I != E; ++I) {
|
|
if (!CSA.CalleeSaved[I])
|
|
continue;
|
|
|
|
BitVector BBDominatedUses = BitVector(DA.NumInstrs, false);
|
|
for (int J : UsesByReg[I].set_bits())
|
|
if (DA.doesADominateB(*First, J))
|
|
BBDominatedUses.set(J);
|
|
LLVM_DEBUG(dbgs() << "\t\tBB " << BB.getName() << " dominates "
|
|
<< BBDominatedUses.count() << " uses for reg " << I
|
|
<< ". Total uses for reg is " << UsesByReg[I].count()
|
|
<< "\n");
|
|
BBDominatedUses &= UsesByReg[I];
|
|
if (BBDominatedUses == UsesByReg[I]) {
|
|
LLVM_DEBUG(dbgs() << "\t\t\tAdded " << BB.getName()
|
|
<< " as a save pos for " << I << "\n");
|
|
BestSavePos[I].push_back(First);
|
|
LLVM_DEBUG({
|
|
dbgs() << "Dominated uses are:\n";
|
|
for (int J : UsesByReg[I].set_bits()) {
|
|
dbgs() << "Idx " << J << ": ";
|
|
BC.printInstruction(dbgs(), *DA.Expressions[J]);
|
|
DA.Expressions[J]->dump();
|
|
}
|
|
});
|
|
}
|
|
}
|
|
}
|
|
|
|
BestSaveCount = std::vector<std::vector<uint64_t>>(BC.MRI->getNumRegs());
|
|
|
|
auto &InsnToBB = Info.getInsnToBBMap();
|
|
for (unsigned I = 0, E = BC.MRI->getNumRegs(); I != E; ++I) {
|
|
if (!CSA.CalleeSaved[I])
|
|
continue;
|
|
|
|
std::stable_sort(BestSavePos[I].begin(), BestSavePos[I].end(),
|
|
[&](const MCInst *A, const MCInst *B) {
|
|
const BinaryBasicBlock *BBA = InsnToBB[A];
|
|
const BinaryBasicBlock *BBB = InsnToBB[B];
|
|
const uint64_t CountA = BBA->getKnownExecutionCount();
|
|
const uint64_t CountB = BBB->getKnownExecutionCount();
|
|
return CountB < CountA;
|
|
});
|
|
|
|
for (MCInst *Pos : BestSavePos[I]) {
|
|
const BinaryBasicBlock *BB = InsnToBB[Pos];
|
|
const uint64_t Count = BB->getKnownExecutionCount();
|
|
BestSaveCount[I].push_back(Count);
|
|
}
|
|
}
|
|
}
|
|
|
|
void ShrinkWrapping::computeDomOrder() {
|
|
DomOrder = std::vector<MCPhysReg>(BC.MRI->getNumRegs(), 0);
|
|
std::vector<MCPhysReg> Order;
|
|
for (MCPhysReg I = 0, E = BC.MRI->getNumRegs(); I != E; ++I) {
|
|
Order.push_back(I);
|
|
}
|
|
|
|
DominatorAnalysis<false> &DA = Info.getDominatorAnalysis();
|
|
auto &InsnToBB = Info.getInsnToBBMap();
|
|
llvm::sort(Order, [&](const MCPhysReg &A, const MCPhysReg &B) {
|
|
BinaryBasicBlock *BBA =
|
|
BestSavePos[A].size() ? InsnToBB[BestSavePos[A].back()] : nullptr;
|
|
BinaryBasicBlock *BBB =
|
|
BestSavePos[B].size() ? InsnToBB[BestSavePos[B].back()] : nullptr;
|
|
if (BBA == BBB)
|
|
return A < B;
|
|
if (!BBA && BBB)
|
|
return false;
|
|
if (BBA && !BBB)
|
|
return true;
|
|
if (DA.doesADominateB(*BestSavePos[A].back(), *BestSavePos[B].back()))
|
|
return true;
|
|
if (DA.doesADominateB(*BestSavePos[B].back(), *BestSavePos[A].back()))
|
|
return false;
|
|
return A < B;
|
|
});
|
|
|
|
for (MCPhysReg I = 0, E = BC.MRI->getNumRegs(); I != E; ++I)
|
|
DomOrder[Order[I]] = I;
|
|
}
|
|
|
|
bool ShrinkWrapping::isBestSavePosCold(unsigned CSR, MCInst *&BestPosSave,
|
|
uint64_t &TotalEstimatedWin) {
|
|
const uint64_t CurSavingCost = CSA.SavingCost[CSR];
|
|
if (!CSA.CalleeSaved[CSR])
|
|
return false;
|
|
|
|
assert(BestSaveCount[CSR].size() == BestSavePos[CSR].size() &&
|
|
"save position vectors out of sync");
|
|
if (BestSaveCount[CSR].empty())
|
|
return false;
|
|
|
|
const uint64_t BestCount = BestSaveCount[CSR].back();
|
|
BestPosSave = BestSavePos[CSR].back();
|
|
if (BestCount >= (opts::ShrinkWrappingThreshold / 100.0) * CurSavingCost)
|
|
return false;
|
|
|
|
LLVM_DEBUG({
|
|
auto &InsnToBB = Info.getInsnToBBMap();
|
|
dbgs() << "Better position for saves found in func " << BF.getPrintName()
|
|
<< " count << " << BF.getKnownExecutionCount() << "\n";
|
|
dbgs() << "Reg: " << CSR << "; New BB: " << InsnToBB[BestPosSave]->getName()
|
|
<< " Freq reduction: " << (CurSavingCost - BestCount) << "\n";
|
|
});
|
|
|
|
TotalEstimatedWin = CurSavingCost - BestCount;
|
|
return true;
|
|
}
|
|
|
|
/// Auxiliar function used to create basic blocks for critical edges and update
|
|
/// the dominance frontier with these new locations
|
|
void ShrinkWrapping::splitFrontierCritEdges(
|
|
BinaryFunction *Func, SmallVector<ProgramPoint, 4> &Frontier,
|
|
const SmallVector<bool, 4> &IsCritEdge,
|
|
const SmallVector<BinaryBasicBlock *, 4> &From,
|
|
const SmallVector<SmallVector<BinaryBasicBlock *, 4>, 4> &To) {
|
|
LLVM_DEBUG(dbgs() << "splitFrontierCritEdges: Now handling func "
|
|
<< BF.getPrintName() << "\n");
|
|
// For every FromBB, there might be one or more critical edges, with
|
|
// To[I] containing destination BBs. It's important to memorize
|
|
// the original size of the Frontier as we may append to it while splitting
|
|
// critical edges originating with blocks with multiple destinations.
|
|
for (size_t I = 0, IE = Frontier.size(); I < IE; ++I) {
|
|
if (!IsCritEdge[I])
|
|
continue;
|
|
if (To[I].empty())
|
|
continue;
|
|
BinaryBasicBlock *FromBB = From[I];
|
|
LLVM_DEBUG(dbgs() << " - Now handling FrontierBB " << FromBB->getName()
|
|
<< "\n");
|
|
// Split edge for every DestinationBBs
|
|
for (size_t DI = 0, DIE = To[I].size(); DI < DIE; ++DI) {
|
|
BinaryBasicBlock *DestinationBB = To[I][DI];
|
|
LLVM_DEBUG(dbgs() << " - Dest : " << DestinationBB->getName() << "\n");
|
|
BinaryBasicBlock *NewBB = Func->splitEdge(FromBB, DestinationBB);
|
|
// Insert dummy instruction so this BB is never empty (we need this for
|
|
// PredictiveStackPointerTracking to work, since it annotates instructions
|
|
// and not BBs).
|
|
if (NewBB->empty()) {
|
|
MCInst NewInst;
|
|
BC.MIB->createNoop(NewInst);
|
|
NewBB->addInstruction(std::move(NewInst));
|
|
scheduleChange(&*NewBB->begin(), WorklistItem(WorklistItem::Erase, 0));
|
|
}
|
|
|
|
// Update frontier
|
|
ProgramPoint NewFrontierPP = ProgramPoint::getLastPointAt(*NewBB);
|
|
if (DI == 0) {
|
|
// Update frontier inplace
|
|
Frontier[I] = NewFrontierPP;
|
|
LLVM_DEBUG(dbgs() << " - Update frontier with " << NewBB->getName()
|
|
<< '\n');
|
|
} else {
|
|
// Append new frontier to the end of the list
|
|
Frontier.push_back(NewFrontierPP);
|
|
LLVM_DEBUG(dbgs() << " - Append frontier " << NewBB->getName()
|
|
<< '\n');
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
SmallVector<ProgramPoint, 4>
|
|
ShrinkWrapping::doRestorePlacement(MCInst *BestPosSave, unsigned CSR,
|
|
uint64_t TotalEstimatedWin) {
|
|
SmallVector<ProgramPoint, 4> Frontier;
|
|
SmallVector<bool, 4> IsCritEdge;
|
|
DominatorAnalysis<false> &DA = Info.getDominatorAnalysis();
|
|
|
|
SmallVector<BinaryBasicBlock *, 4> CritEdgesFrom;
|
|
SmallVector<SmallVector<BinaryBasicBlock *, 4>, 4> CritEdgesTo;
|
|
// In case of a critical edge, we need to create extra BBs to host restores
|
|
// into edges transitioning to the dominance frontier, otherwise we pull these
|
|
// restores to inside the dominated area.
|
|
Frontier = DA.getDominanceFrontierFor(*BestPosSave).takeVector();
|
|
LLVM_DEBUG({
|
|
dbgs() << "Dumping dominance frontier for ";
|
|
BC.printInstruction(dbgs(), *BestPosSave);
|
|
for (ProgramPoint &PP : Frontier)
|
|
if (PP.isInst())
|
|
BC.printInstruction(dbgs(), *PP.getInst());
|
|
else
|
|
dbgs() << PP.getBB()->getName() << "\n";
|
|
});
|
|
for (ProgramPoint &PP : Frontier) {
|
|
bool HasCritEdges = false;
|
|
if (PP.isInst() && BC.MIB->isTerminator(*PP.getInst()) &&
|
|
doesInstUsesCSR(*PP.getInst(), CSR)) {
|
|
Frontier.clear();
|
|
return Frontier;
|
|
}
|
|
BinaryBasicBlock *FrontierBB = Info.getParentBB(PP);
|
|
CritEdgesFrom.emplace_back(FrontierBB);
|
|
CritEdgesTo.emplace_back(0);
|
|
SmallVector<BinaryBasicBlock *, 4> &Dests = CritEdgesTo.back();
|
|
// Check for invoke instructions at the dominance frontier, which indicates
|
|
// the landing pad is not dominated.
|
|
if (PP.isInst() && BC.MIB->isInvoke(*PP.getInst())) {
|
|
Frontier.clear();
|
|
return Frontier;
|
|
}
|
|
doForAllSuccs(*FrontierBB, [&](ProgramPoint P) {
|
|
if (!DA.doesADominateB(*BestPosSave, P)) {
|
|
Dests.emplace_back(Info.getParentBB(P));
|
|
return;
|
|
}
|
|
HasCritEdges = true;
|
|
});
|
|
IsCritEdge.push_back(HasCritEdges);
|
|
}
|
|
// Restores cannot be placed in empty BBs because we have a dataflow
|
|
// analysis that depends on insertions happening before real instructions
|
|
// (PredictiveStackPointerTracking). Detect now for empty BBs and add a
|
|
// dummy nop that is scheduled to be removed later.
|
|
bool InvalidateRequired = false;
|
|
for (BinaryBasicBlock *BB : BF.getLayout().blocks()) {
|
|
if (BB->size() != 0)
|
|
continue;
|
|
MCInst NewInst;
|
|
BC.MIB->createNoop(NewInst);
|
|
auto II = BB->addInstruction(std::move(NewInst));
|
|
scheduleChange(&*II, WorklistItem(WorklistItem::Erase, 0));
|
|
InvalidateRequired = true;
|
|
}
|
|
if (std::accumulate(IsCritEdge.begin(), IsCritEdge.end(), 0)) {
|
|
LLVM_DEBUG({
|
|
dbgs() << "Now detected critical edges in the following frontier:\n";
|
|
for (ProgramPoint &PP : Frontier) {
|
|
if (PP.isBB()) {
|
|
dbgs() << " BB: " << PP.getBB()->getName() << "\n";
|
|
} else {
|
|
dbgs() << " Inst: ";
|
|
PP.getInst()->dump();
|
|
}
|
|
}
|
|
});
|
|
splitFrontierCritEdges(&BF, Frontier, IsCritEdge, CritEdgesFrom,
|
|
CritEdgesTo);
|
|
InvalidateRequired = true;
|
|
}
|
|
if (InvalidateRequired) {
|
|
// BitVectors that represent all insns of the function are invalid now
|
|
// since we changed BBs/Insts. Re-run steps that depend on pointers being
|
|
// valid
|
|
Info.invalidateAll();
|
|
classifyCSRUses();
|
|
}
|
|
return Frontier;
|
|
}
|
|
|
|
bool ShrinkWrapping::validatePushPopsMode(unsigned CSR, MCInst *BestPosSave,
|
|
int64_t SaveOffset) {
|
|
if (FA.requiresAlignment(BF)) {
|
|
LLVM_DEBUG({
|
|
dbgs() << "Reg " << CSR
|
|
<< " is not using push/pops due to function "
|
|
"alignment requirements.\n";
|
|
});
|
|
return false;
|
|
}
|
|
if (FA.hasStackArithmetic(BF)) {
|
|
LLVM_DEBUG({
|
|
dbgs() << "Reg " << CSR
|
|
<< " is not using push/pops due to function "
|
|
"taking the address of a stack position.\n";
|
|
});
|
|
return false;
|
|
}
|
|
for (MCInst *Save : CSA.getSavesByReg(CSR)) {
|
|
if (!SLM.canCollapseRegion(Save)) {
|
|
LLVM_DEBUG(dbgs() << "Reg " << CSR << " cannot collapse region.\n");
|
|
return false;
|
|
}
|
|
}
|
|
// Abort if one of the restores for this CSR is not a POP.
|
|
for (MCInst *Load : CSA.getRestoresByReg(CSR)) {
|
|
if (!BC.MIB->isPop(*Load)) {
|
|
LLVM_DEBUG(dbgs() << "Reg " << CSR << " has a mismatching restore.\n");
|
|
return false;
|
|
}
|
|
}
|
|
|
|
StackPointerTracking &SPT = Info.getStackPointerTracking();
|
|
// Abort if we are inserting a push into an entry BB (offset -8) and this
|
|
// func sets up a frame pointer.
|
|
if (!SLM.canInsertRegion(BestPosSave) || SaveOffset == SPT.SUPERPOSITION ||
|
|
SaveOffset == SPT.EMPTY || (SaveOffset == -8 && SPT.HasFramePointer)) {
|
|
LLVM_DEBUG({
|
|
dbgs() << "Reg " << CSR
|
|
<< " cannot insert region or we are "
|
|
"trying to insert a push into entry bb.\n";
|
|
});
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
SmallVector<ProgramPoint, 4> ShrinkWrapping::fixPopsPlacements(
|
|
const SmallVector<ProgramPoint, 4> &RestorePoints, int64_t SaveOffset,
|
|
unsigned CSR) {
|
|
SmallVector<ProgramPoint, 4> FixedRestorePoints = RestorePoints;
|
|
// Moving pop locations to the correct sp offset
|
|
ReachingInsns<true> &RI = Info.getReachingInsnsBackwards();
|
|
StackPointerTracking &SPT = Info.getStackPointerTracking();
|
|
for (ProgramPoint &PP : FixedRestorePoints) {
|
|
BinaryBasicBlock *BB = Info.getParentBB(PP);
|
|
bool Found = false;
|
|
if (SPT.getStateAt(ProgramPoint::getLastPointAt(*BB))->first ==
|
|
SaveOffset) {
|
|
BitVector BV = *RI.getStateAt(ProgramPoint::getLastPointAt(*BB));
|
|
BV &= UsesByReg[CSR];
|
|
if (!BV.any()) {
|
|
Found = true;
|
|
PP = BB;
|
|
continue;
|
|
}
|
|
}
|
|
for (auto RIt = BB->rbegin(), End = BB->rend(); RIt != End; ++RIt) {
|
|
if (SPT.getStateBefore(*RIt)->first == SaveOffset) {
|
|
BitVector BV = *RI.getStateAt(*RIt);
|
|
BV &= UsesByReg[CSR];
|
|
if (!BV.any()) {
|
|
Found = true;
|
|
PP = &*RIt;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
if (!Found) {
|
|
LLVM_DEBUG({
|
|
dbgs() << "Could not find restore insertion point for " << CSR
|
|
<< ", falling back to load/store mode\n";
|
|
});
|
|
FixedRestorePoints.clear();
|
|
return FixedRestorePoints;
|
|
}
|
|
}
|
|
return FixedRestorePoints;
|
|
}
|
|
|
|
void ShrinkWrapping::scheduleOldSaveRestoresRemoval(unsigned CSR,
|
|
bool UsePushPops) {
|
|
|
|
for (BinaryBasicBlock *BB : BF.getLayout().blocks()) {
|
|
std::vector<MCInst *> CFIs;
|
|
for (auto I = BB->rbegin(), E = BB->rend(); I != E; ++I) {
|
|
MCInst &Inst = *I;
|
|
if (BC.MIB->isCFI(Inst)) {
|
|
// Delete all offset CFIs related to this CSR
|
|
if (SLM.getOffsetCFIReg(Inst) == CSR) {
|
|
HasDeletedOffsetCFIs[CSR] = true;
|
|
scheduleChange(&Inst, WorklistItem(WorklistItem::Erase, CSR));
|
|
continue;
|
|
}
|
|
CFIs.push_back(&Inst);
|
|
continue;
|
|
}
|
|
|
|
uint16_t SavedReg = CSA.getSavedReg(Inst);
|
|
uint16_t RestoredReg = CSA.getRestoredReg(Inst);
|
|
if (SavedReg != CSR && RestoredReg != CSR) {
|
|
CFIs.clear();
|
|
continue;
|
|
}
|
|
|
|
scheduleChange(&Inst, WorklistItem(UsePushPops
|
|
? WorklistItem::Erase
|
|
: WorklistItem::ChangeToAdjustment,
|
|
CSR));
|
|
|
|
// Delete associated CFIs
|
|
const bool RecordDeletedPushCFIs =
|
|
SavedReg == CSR && DeletedPushCFIs[CSR].empty();
|
|
const bool RecordDeletedPopCFIs =
|
|
RestoredReg == CSR && DeletedPopCFIs[CSR].empty();
|
|
for (MCInst *CFI : CFIs) {
|
|
const MCCFIInstruction *MCCFI = BF.getCFIFor(*CFI);
|
|
// Do not touch these...
|
|
if (MCCFI->getOperation() == MCCFIInstruction::OpRestoreState ||
|
|
MCCFI->getOperation() == MCCFIInstruction::OpRememberState)
|
|
continue;
|
|
scheduleChange(CFI, WorklistItem(WorklistItem::Erase, CSR));
|
|
if (RecordDeletedPushCFIs) {
|
|
// Do not record this to be replayed later because we are going to
|
|
// rebuild it.
|
|
if (MCCFI->getOperation() == MCCFIInstruction::OpDefCfaOffset)
|
|
continue;
|
|
DeletedPushCFIs[CSR].push_back(CFI->getOperand(0).getImm());
|
|
}
|
|
if (RecordDeletedPopCFIs) {
|
|
if (MCCFI->getOperation() == MCCFIInstruction::OpDefCfaOffset)
|
|
continue;
|
|
DeletedPopCFIs[CSR].push_back(CFI->getOperand(0).getImm());
|
|
}
|
|
}
|
|
CFIs.clear();
|
|
}
|
|
}
|
|
}
|
|
|
|
bool ShrinkWrapping::doesInstUsesCSR(const MCInst &Inst, uint16_t CSR) {
|
|
if (BC.MIB->isCFI(Inst) || CSA.getSavedReg(Inst) == CSR ||
|
|
CSA.getRestoredReg(Inst) == CSR)
|
|
return false;
|
|
BitVector BV = BitVector(BC.MRI->getNumRegs(), false);
|
|
BC.MIB->getTouchedRegs(Inst, BV);
|
|
return BV[CSR];
|
|
}
|
|
|
|
void ShrinkWrapping::scheduleSaveRestoreInsertions(
|
|
unsigned CSR, MCInst *BestPosSave,
|
|
SmallVector<ProgramPoint, 4> &RestorePoints, bool UsePushPops) {
|
|
auto &InsnToBB = Info.getInsnToBBMap();
|
|
const FrameIndexEntry *FIESave = CSA.SaveFIEByReg[CSR];
|
|
const FrameIndexEntry *FIELoad = CSA.LoadFIEByReg[CSR];
|
|
assert(FIESave && FIELoad && "Invalid CSR");
|
|
|
|
LLVM_DEBUG({
|
|
dbgs() << "Scheduling save insertion at: ";
|
|
BestPosSave->dump();
|
|
});
|
|
|
|
scheduleChange(BestPosSave,
|
|
UsePushPops ? WorklistItem::InsertPushOrPop
|
|
: WorklistItem::InsertLoadOrStore,
|
|
*FIESave, CSR);
|
|
|
|
for (ProgramPoint &PP : RestorePoints) {
|
|
BinaryBasicBlock *FrontierBB = Info.getParentBB(PP);
|
|
LLVM_DEBUG({
|
|
dbgs() << "Scheduling restore insertion at: ";
|
|
if (PP.isInst())
|
|
PP.getInst()->dump();
|
|
else
|
|
dbgs() << PP.getBB()->getName() << "\n";
|
|
});
|
|
MCInst *Term =
|
|
FrontierBB->getTerminatorBefore(PP.isInst() ? PP.getInst() : nullptr);
|
|
if (Term)
|
|
PP = Term;
|
|
bool PrecededByPrefix = false;
|
|
if (PP.isInst()) {
|
|
auto Iter = FrontierBB->findInstruction(PP.getInst());
|
|
if (Iter != FrontierBB->end() && Iter != FrontierBB->begin()) {
|
|
--Iter;
|
|
PrecededByPrefix = BC.MIB->isPrefix(*Iter);
|
|
}
|
|
}
|
|
if (PP.isInst() &&
|
|
(doesInstUsesCSR(*PP.getInst(), CSR) || PrecededByPrefix)) {
|
|
assert(!InsnToBB[PP.getInst()]->hasTerminatorAfter(PP.getInst()) &&
|
|
"cannot move to end of bb");
|
|
scheduleChange(InsnToBB[PP.getInst()],
|
|
UsePushPops ? WorklistItem::InsertPushOrPop
|
|
: WorklistItem::InsertLoadOrStore,
|
|
*FIELoad, CSR);
|
|
continue;
|
|
}
|
|
scheduleChange(PP,
|
|
UsePushPops ? WorklistItem::InsertPushOrPop
|
|
: WorklistItem::InsertLoadOrStore,
|
|
*FIELoad, CSR);
|
|
}
|
|
}
|
|
|
|
void ShrinkWrapping::moveSaveRestores() {
|
|
bool DisablePushPopMode = false;
|
|
bool UsedPushPopMode = false;
|
|
// Keeps info about successfully moved regs: reg index, save position and
|
|
// save size
|
|
std::vector<std::tuple<unsigned, MCInst *, size_t>> MovedRegs;
|
|
uint64_t TotalEstimatedWin = 0;
|
|
|
|
computeDomOrder();
|
|
for (unsigned I = 0, E = BC.MRI->getNumRegs(); I != E; ++I) {
|
|
MCInst *BestPosSave = nullptr;
|
|
uint64_t EstimatedWin = 0;
|
|
SmallVector<ProgramPoint, 4> RestorePoints;
|
|
while (RestorePoints.empty() &&
|
|
isBestSavePosCold(I, BestPosSave, EstimatedWin)) {
|
|
RestorePoints = doRestorePlacement(BestPosSave, I, EstimatedWin);
|
|
if (RestorePoints.empty()) {
|
|
LLVM_DEBUG({
|
|
dbgs() << "Dropping opportunity because restore placement failed"
|
|
" -- total est. freq reduc: "
|
|
<< EstimatedWin << ". Will try "
|
|
<< (BestSaveCount[I].size() - 1) << " more times.\n";
|
|
});
|
|
BestSaveCount[I].pop_back();
|
|
BestSavePos[I].pop_back();
|
|
computeDomOrder();
|
|
}
|
|
}
|
|
if (RestorePoints.empty()) {
|
|
SpillsFailedDynamicCount += EstimatedWin;
|
|
continue;
|
|
}
|
|
|
|
const FrameIndexEntry *FIESave = CSA.SaveFIEByReg[I];
|
|
const FrameIndexEntry *FIELoad = CSA.LoadFIEByReg[I];
|
|
(void)FIELoad;
|
|
assert(FIESave && FIELoad);
|
|
StackPointerTracking &SPT = Info.getStackPointerTracking();
|
|
const std::pair<int, int> SPFP = *SPT.getStateBefore(*BestPosSave);
|
|
int SaveOffset = SPFP.first;
|
|
uint8_t SaveSize = FIESave->Size;
|
|
|
|
// If we don't know stack state at this point, bail
|
|
if ((SPFP.first == SPT.SUPERPOSITION || SPFP.first == SPT.EMPTY) &&
|
|
(SPFP.second == SPT.SUPERPOSITION || SPFP.second == SPT.EMPTY)) {
|
|
SpillsFailedDynamicCount += EstimatedWin;
|
|
continue;
|
|
}
|
|
|
|
// Operation mode: if true, will insert push/pops instead of loads/restores
|
|
bool UsePushPops = validatePushPopsMode(I, BestPosSave, SaveOffset);
|
|
|
|
if (UsePushPops) {
|
|
SmallVector<ProgramPoint, 4> FixedRestorePoints =
|
|
fixPopsPlacements(RestorePoints, SaveOffset, I);
|
|
if (FixedRestorePoints.empty())
|
|
UsePushPops = false;
|
|
else
|
|
RestorePoints = FixedRestorePoints;
|
|
}
|
|
|
|
// Disable push-pop mode for all CSRs in this function
|
|
if (!UsePushPops)
|
|
DisablePushPopMode = true;
|
|
else
|
|
UsedPushPopMode = true;
|
|
|
|
scheduleOldSaveRestoresRemoval(I, UsePushPops);
|
|
scheduleSaveRestoreInsertions(I, BestPosSave, RestorePoints, UsePushPops);
|
|
MovedRegs.emplace_back(std::make_tuple(I, BestPosSave, SaveSize));
|
|
TotalEstimatedWin += EstimatedWin;
|
|
}
|
|
|
|
// Revert push-pop mode if it failed for a single CSR
|
|
if (DisablePushPopMode && UsedPushPopMode) {
|
|
UsedPushPopMode = false;
|
|
for (BinaryBasicBlock &BB : BF) {
|
|
auto WRI = Todo.find(&BB);
|
|
if (WRI != Todo.end()) {
|
|
std::vector<WorklistItem> &TodoList = WRI->second;
|
|
for (WorklistItem &Item : TodoList)
|
|
if (Item.Action == WorklistItem::InsertPushOrPop)
|
|
Item.Action = WorklistItem::InsertLoadOrStore;
|
|
}
|
|
for (auto I = BB.rbegin(), E = BB.rend(); I != E; ++I) {
|
|
MCInst &Inst = *I;
|
|
auto TodoList = BC.MIB->tryGetAnnotationAs<std::vector<WorklistItem>>(
|
|
Inst, getAnnotationIndex());
|
|
if (!TodoList)
|
|
continue;
|
|
bool isCFI = BC.MIB->isCFI(Inst);
|
|
for (WorklistItem &Item : *TodoList) {
|
|
if (Item.Action == WorklistItem::InsertPushOrPop)
|
|
Item.Action = WorklistItem::InsertLoadOrStore;
|
|
if (!isCFI && Item.Action == WorklistItem::Erase)
|
|
Item.Action = WorklistItem::ChangeToAdjustment;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
SpillsMovedDynamicCount += TotalEstimatedWin;
|
|
|
|
// Update statistics
|
|
if (!UsedPushPopMode) {
|
|
SpillsMovedRegularMode += MovedRegs.size();
|
|
return;
|
|
}
|
|
|
|
// Schedule modifications to stack-accessing instructions via
|
|
// StackLayoutModifier.
|
|
SpillsMovedPushPopMode += MovedRegs.size();
|
|
for (std::tuple<unsigned, MCInst *, size_t> &I : MovedRegs) {
|
|
unsigned RegNdx;
|
|
MCInst *SavePos;
|
|
size_t SaveSize;
|
|
std::tie(RegNdx, SavePos, SaveSize) = I;
|
|
for (MCInst *Save : CSA.getSavesByReg(RegNdx))
|
|
SLM.collapseRegion(Save);
|
|
SLM.insertRegion(SavePos, SaveSize);
|
|
}
|
|
}
|
|
|
|
namespace {
|
|
/// Helper function to identify whether two basic blocks created by splitting
|
|
/// a critical edge have the same contents.
|
|
bool isIdenticalSplitEdgeBB(const BinaryContext &BC, const BinaryBasicBlock &A,
|
|
const BinaryBasicBlock &B) {
|
|
if (A.succ_size() != B.succ_size())
|
|
return false;
|
|
if (A.succ_size() != 1)
|
|
return false;
|
|
|
|
if (*A.succ_begin() != *B.succ_begin())
|
|
return false;
|
|
|
|
if (A.size() != B.size())
|
|
return false;
|
|
|
|
// Compare instructions
|
|
auto I = A.begin(), E = A.end();
|
|
auto OtherI = B.begin(), OtherE = B.end();
|
|
while (I != E && OtherI != OtherE) {
|
|
if (I->getOpcode() != OtherI->getOpcode())
|
|
return false;
|
|
if (!BC.MIB->equals(*I, *OtherI, [](const MCSymbol *A, const MCSymbol *B) {
|
|
return true;
|
|
}))
|
|
return false;
|
|
++I;
|
|
++OtherI;
|
|
}
|
|
return true;
|
|
}
|
|
} // namespace
|
|
|
|
bool ShrinkWrapping::foldIdenticalSplitEdges() {
|
|
bool Changed = false;
|
|
for (auto Iter = BF.begin(); Iter != BF.end(); ++Iter) {
|
|
BinaryBasicBlock &BB = *Iter;
|
|
if (!BB.getName().startswith(".LSplitEdge"))
|
|
continue;
|
|
for (auto RIter = BF.rbegin(); RIter != BF.rend(); ++RIter) {
|
|
BinaryBasicBlock &RBB = *RIter;
|
|
if (&RBB == &BB)
|
|
break;
|
|
if (!RBB.getName().startswith(".LSplitEdge") || !RBB.isValid() ||
|
|
!isIdenticalSplitEdgeBB(BC, *Iter, RBB))
|
|
continue;
|
|
assert(RBB.pred_size() == 1 && "Invalid split edge BB");
|
|
BinaryBasicBlock *Pred = *RBB.pred_begin();
|
|
uint64_t OrigCount = Pred->branch_info_begin()->Count;
|
|
uint64_t OrigMispreds = Pred->branch_info_begin()->MispredictedCount;
|
|
BF.replaceJumpTableEntryIn(Pred, &RBB, &BB);
|
|
Pred->replaceSuccessor(&RBB, &BB, OrigCount, OrigMispreds);
|
|
Changed = true;
|
|
// Remove the block from CFG
|
|
RBB.markValid(false);
|
|
}
|
|
}
|
|
|
|
return Changed;
|
|
}
|
|
|
|
namespace {
|
|
|
|
// A special StackPointerTracking that compensates for our future plans
|
|
// in removing/adding insn.
|
|
class PredictiveStackPointerTracking
|
|
: public StackPointerTrackingBase<PredictiveStackPointerTracking> {
|
|
friend class DataflowAnalysis<PredictiveStackPointerTracking,
|
|
std::pair<int, int>>;
|
|
decltype(ShrinkWrapping::Todo) &TodoMap;
|
|
DataflowInfoManager &Info;
|
|
|
|
Optional<unsigned> AnnotationIndex;
|
|
|
|
protected:
|
|
void compNextAux(const MCInst &Point,
|
|
const std::vector<ShrinkWrapping::WorklistItem> &TodoItems,
|
|
std::pair<int, int> &Res) {
|
|
for (const ShrinkWrapping::WorklistItem &Item : TodoItems) {
|
|
if (Item.Action == ShrinkWrapping::WorklistItem::Erase &&
|
|
BC.MIB->isPush(Point)) {
|
|
Res.first += BC.MIB->getPushSize(Point);
|
|
continue;
|
|
}
|
|
if (Item.Action == ShrinkWrapping::WorklistItem::Erase &&
|
|
BC.MIB->isPop(Point)) {
|
|
Res.first -= BC.MIB->getPopSize(Point);
|
|
continue;
|
|
}
|
|
if (Item.Action == ShrinkWrapping::WorklistItem::InsertPushOrPop &&
|
|
Item.FIEToInsert.IsStore) {
|
|
Res.first -= Item.FIEToInsert.Size;
|
|
continue;
|
|
}
|
|
if (Item.Action == ShrinkWrapping::WorklistItem::InsertPushOrPop &&
|
|
Item.FIEToInsert.IsLoad) {
|
|
Res.first += Item.FIEToInsert.Size;
|
|
continue;
|
|
}
|
|
}
|
|
}
|
|
|
|
std::pair<int, int> computeNext(const MCInst &Point,
|
|
const std::pair<int, int> &Cur) {
|
|
std::pair<int, int> Res =
|
|
StackPointerTrackingBase<PredictiveStackPointerTracking>::computeNext(
|
|
Point, Cur);
|
|
if (Res.first == StackPointerTracking::SUPERPOSITION ||
|
|
Res.first == StackPointerTracking::EMPTY)
|
|
return Res;
|
|
auto TodoItems =
|
|
BC.MIB->tryGetAnnotationAs<std::vector<ShrinkWrapping::WorklistItem>>(
|
|
Point, ShrinkWrapping::getAnnotationName());
|
|
if (TodoItems)
|
|
compNextAux(Point, *TodoItems, Res);
|
|
auto &InsnToBBMap = Info.getInsnToBBMap();
|
|
if (&*InsnToBBMap[&Point]->rbegin() != &Point)
|
|
return Res;
|
|
auto WRI = TodoMap.find(InsnToBBMap[&Point]);
|
|
if (WRI == TodoMap.end())
|
|
return Res;
|
|
compNextAux(Point, WRI->second, Res);
|
|
return Res;
|
|
}
|
|
|
|
StringRef getAnnotationName() const {
|
|
return StringRef("PredictiveStackPointerTracking");
|
|
}
|
|
|
|
public:
|
|
PredictiveStackPointerTracking(BinaryFunction &BF,
|
|
decltype(ShrinkWrapping::Todo) &TodoMap,
|
|
DataflowInfoManager &Info,
|
|
MCPlusBuilder::AllocatorIdTy AllocatorId = 0)
|
|
: StackPointerTrackingBase<PredictiveStackPointerTracking>(BF,
|
|
AllocatorId),
|
|
TodoMap(TodoMap), Info(Info) {}
|
|
|
|
void run() {
|
|
StackPointerTrackingBase<PredictiveStackPointerTracking>::run();
|
|
}
|
|
};
|
|
|
|
} // end anonymous namespace
|
|
|
|
void ShrinkWrapping::insertUpdatedCFI(unsigned CSR, int SPValPush,
|
|
int SPValPop) {
|
|
MCInst *SavePoint = nullptr;
|
|
for (BinaryBasicBlock &BB : BF) {
|
|
for (auto InstIter = BB.rbegin(), EndIter = BB.rend(); InstIter != EndIter;
|
|
++InstIter) {
|
|
int32_t SrcImm = 0;
|
|
MCPhysReg Reg = 0;
|
|
MCPhysReg StackPtrReg = 0;
|
|
int64_t StackOffset = 0;
|
|
bool IsIndexed = false;
|
|
bool IsLoad = false;
|
|
bool IsStore = false;
|
|
bool IsSimple = false;
|
|
bool IsStoreFromReg = false;
|
|
uint8_t Size = 0;
|
|
if (!BC.MIB->isStackAccess(*InstIter, IsLoad, IsStore, IsStoreFromReg,
|
|
Reg, SrcImm, StackPtrReg, StackOffset, Size,
|
|
IsSimple, IsIndexed))
|
|
continue;
|
|
if (Reg != CSR || !IsStore || !IsSimple)
|
|
continue;
|
|
SavePoint = &*InstIter;
|
|
break;
|
|
}
|
|
if (SavePoint)
|
|
break;
|
|
}
|
|
assert(SavePoint);
|
|
LLVM_DEBUG({
|
|
dbgs() << "Now using as save point for reg " << CSR << " :";
|
|
SavePoint->dump();
|
|
});
|
|
bool PrevAffectedZone = false;
|
|
BinaryBasicBlock *PrevBB = nullptr;
|
|
DominatorAnalysis<false> &DA = Info.getDominatorAnalysis();
|
|
for (BinaryBasicBlock *BB : BF.getLayout().blocks()) {
|
|
if (BB->size() == 0)
|
|
continue;
|
|
const bool InAffectedZoneAtEnd = DA.count(*BB->rbegin(), *SavePoint);
|
|
const bool InAffectedZoneAtBegin =
|
|
(*DA.getStateBefore(*BB->begin()))[DA.ExprToIdx[SavePoint]];
|
|
bool InAffectedZone = InAffectedZoneAtBegin;
|
|
for (auto InstIter = BB->begin(); InstIter != BB->end(); ++InstIter) {
|
|
const bool CurZone = DA.count(*InstIter, *SavePoint);
|
|
if (InAffectedZone != CurZone) {
|
|
auto InsertionIter = InstIter;
|
|
++InsertionIter;
|
|
InAffectedZone = CurZone;
|
|
if (InAffectedZone)
|
|
InstIter = insertCFIsForPushOrPop(*BB, InsertionIter, CSR, true, 0,
|
|
SPValPop);
|
|
else
|
|
InstIter = insertCFIsForPushOrPop(*BB, InsertionIter, CSR, false, 0,
|
|
SPValPush);
|
|
--InstIter;
|
|
}
|
|
}
|
|
// Are we at the first basic block or hot-cold split point?
|
|
if (!PrevBB || (BF.isSplit() && BB->isCold() != PrevBB->isCold())) {
|
|
if (InAffectedZoneAtBegin)
|
|
insertCFIsForPushOrPop(*BB, BB->begin(), CSR, true, 0, SPValPush);
|
|
} else if (InAffectedZoneAtBegin != PrevAffectedZone) {
|
|
if (InAffectedZoneAtBegin)
|
|
insertCFIsForPushOrPop(*PrevBB, PrevBB->end(), CSR, true, 0, SPValPush);
|
|
else
|
|
insertCFIsForPushOrPop(*PrevBB, PrevBB->end(), CSR, false, 0, SPValPop);
|
|
}
|
|
PrevAffectedZone = InAffectedZoneAtEnd;
|
|
PrevBB = BB;
|
|
}
|
|
}
|
|
|
|
void ShrinkWrapping::rebuildCFIForSP() {
|
|
for (BinaryBasicBlock &BB : BF) {
|
|
for (MCInst &Inst : BB) {
|
|
if (!BC.MIB->isCFI(Inst))
|
|
continue;
|
|
const MCCFIInstruction *CFI = BF.getCFIFor(Inst);
|
|
if (CFI->getOperation() == MCCFIInstruction::OpDefCfaOffset)
|
|
BC.MIB->addAnnotation(Inst, "DeleteMe", 0U, AllocatorId);
|
|
}
|
|
}
|
|
|
|
int PrevSPVal = -8;
|
|
BinaryBasicBlock *PrevBB = nullptr;
|
|
StackPointerTracking &SPT = Info.getStackPointerTracking();
|
|
for (BinaryBasicBlock *BB : BF.getLayout().blocks()) {
|
|
if (BB->size() == 0)
|
|
continue;
|
|
const int SPValAtEnd = SPT.getStateAt(*BB->rbegin())->first;
|
|
const int SPValAtBegin = SPT.getStateBefore(*BB->begin())->first;
|
|
int SPVal = SPValAtBegin;
|
|
for (auto Iter = BB->begin(); Iter != BB->end(); ++Iter) {
|
|
const int CurVal = SPT.getStateAt(*Iter)->first;
|
|
if (SPVal != CurVal) {
|
|
auto InsertionIter = Iter;
|
|
++InsertionIter;
|
|
Iter = BF.addCFIInstruction(
|
|
BB, InsertionIter,
|
|
MCCFIInstruction::cfiDefCfaOffset(nullptr, -CurVal));
|
|
SPVal = CurVal;
|
|
}
|
|
}
|
|
if (BF.isSplit() && PrevBB && BB->isCold() != PrevBB->isCold())
|
|
BF.addCFIInstruction(
|
|
BB, BB->begin(),
|
|
MCCFIInstruction::cfiDefCfaOffset(nullptr, -SPValAtBegin));
|
|
else if (SPValAtBegin != PrevSPVal)
|
|
BF.addCFIInstruction(
|
|
PrevBB, PrevBB->end(),
|
|
MCCFIInstruction::cfiDefCfaOffset(nullptr, -SPValAtBegin));
|
|
PrevSPVal = SPValAtEnd;
|
|
PrevBB = BB;
|
|
}
|
|
|
|
for (BinaryBasicBlock &BB : BF)
|
|
for (auto I = BB.begin(); I != BB.end();)
|
|
if (BC.MIB->hasAnnotation(*I, "DeleteMe"))
|
|
I = BB.eraseInstruction(I);
|
|
else
|
|
++I;
|
|
}
|
|
|
|
MCInst ShrinkWrapping::createStackAccess(int SPVal, int FPVal,
|
|
const FrameIndexEntry &FIE,
|
|
bool CreatePushOrPop) {
|
|
MCInst NewInst;
|
|
if (SPVal != StackPointerTracking::SUPERPOSITION &&
|
|
SPVal != StackPointerTracking::EMPTY) {
|
|
if (FIE.IsLoad) {
|
|
if (!BC.MIB->createRestoreFromStack(NewInst, BC.MIB->getStackPointer(),
|
|
FIE.StackOffset - SPVal, FIE.RegOrImm,
|
|
FIE.Size)) {
|
|
errs() << "createRestoreFromStack: not supported on this platform\n";
|
|
abort();
|
|
}
|
|
} else {
|
|
if (!BC.MIB->createSaveToStack(NewInst, BC.MIB->getStackPointer(),
|
|
FIE.StackOffset - SPVal, FIE.RegOrImm,
|
|
FIE.Size)) {
|
|
errs() << "createSaveToStack: not supported on this platform\n";
|
|
abort();
|
|
}
|
|
}
|
|
if (CreatePushOrPop)
|
|
BC.MIB->changeToPushOrPop(NewInst);
|
|
return NewInst;
|
|
}
|
|
assert(FPVal != StackPointerTracking::SUPERPOSITION &&
|
|
FPVal != StackPointerTracking::EMPTY);
|
|
|
|
if (FIE.IsLoad) {
|
|
if (!BC.MIB->createRestoreFromStack(NewInst, BC.MIB->getFramePointer(),
|
|
FIE.StackOffset - FPVal, FIE.RegOrImm,
|
|
FIE.Size)) {
|
|
errs() << "createRestoreFromStack: not supported on this platform\n";
|
|
abort();
|
|
}
|
|
} else {
|
|
if (!BC.MIB->createSaveToStack(NewInst, BC.MIB->getFramePointer(),
|
|
FIE.StackOffset - FPVal, FIE.RegOrImm,
|
|
FIE.Size)) {
|
|
errs() << "createSaveToStack: not supported on this platform\n";
|
|
abort();
|
|
}
|
|
}
|
|
return NewInst;
|
|
}
|
|
|
|
void ShrinkWrapping::updateCFIInstOffset(MCInst &Inst, int64_t NewOffset) {
|
|
const MCCFIInstruction *CFI = BF.getCFIFor(Inst);
|
|
if (UpdatedCFIs.count(CFI))
|
|
return;
|
|
|
|
switch (CFI->getOperation()) {
|
|
case MCCFIInstruction::OpDefCfa:
|
|
case MCCFIInstruction::OpDefCfaRegister:
|
|
case MCCFIInstruction::OpDefCfaOffset:
|
|
CFI = BF.mutateCFIOffsetFor(Inst, -NewOffset);
|
|
break;
|
|
case MCCFIInstruction::OpOffset:
|
|
default:
|
|
break;
|
|
}
|
|
|
|
UpdatedCFIs.insert(CFI);
|
|
}
|
|
|
|
BBIterTy ShrinkWrapping::insertCFIsForPushOrPop(BinaryBasicBlock &BB,
|
|
BBIterTy Pos, unsigned Reg,
|
|
bool isPush, int Sz,
|
|
int64_t NewOffset) {
|
|
if (isPush) {
|
|
for (uint32_t Idx : DeletedPushCFIs[Reg]) {
|
|
Pos = BF.addCFIPseudo(&BB, Pos, Idx);
|
|
updateCFIInstOffset(*Pos++, NewOffset);
|
|
}
|
|
if (HasDeletedOffsetCFIs[Reg]) {
|
|
Pos = BF.addCFIInstruction(
|
|
&BB, Pos,
|
|
MCCFIInstruction::createOffset(
|
|
nullptr, BC.MRI->getDwarfRegNum(Reg, false), NewOffset));
|
|
++Pos;
|
|
}
|
|
} else {
|
|
for (uint32_t Idx : DeletedPopCFIs[Reg]) {
|
|
Pos = BF.addCFIPseudo(&BB, Pos, Idx);
|
|
updateCFIInstOffset(*Pos++, NewOffset);
|
|
}
|
|
if (HasDeletedOffsetCFIs[Reg]) {
|
|
Pos = BF.addCFIInstruction(
|
|
&BB, Pos,
|
|
MCCFIInstruction::createSameValue(
|
|
nullptr, BC.MRI->getDwarfRegNum(Reg, false)));
|
|
++Pos;
|
|
}
|
|
}
|
|
return Pos;
|
|
}
|
|
|
|
BBIterTy ShrinkWrapping::processInsertion(BBIterTy InsertionPoint,
|
|
BinaryBasicBlock *CurBB,
|
|
const WorklistItem &Item,
|
|
int64_t SPVal, int64_t FPVal) {
|
|
// Trigger CFI reconstruction for this CSR if necessary - writing to
|
|
// PushOffsetByReg/PopOffsetByReg *will* trigger CFI update
|
|
if ((Item.FIEToInsert.IsStore &&
|
|
!DeletedPushCFIs[Item.AffectedReg].empty()) ||
|
|
(Item.FIEToInsert.IsLoad && !DeletedPopCFIs[Item.AffectedReg].empty()) ||
|
|
HasDeletedOffsetCFIs[Item.AffectedReg]) {
|
|
if (Item.Action == WorklistItem::InsertPushOrPop) {
|
|
if (Item.FIEToInsert.IsStore)
|
|
PushOffsetByReg[Item.AffectedReg] = SPVal - Item.FIEToInsert.Size;
|
|
else
|
|
PopOffsetByReg[Item.AffectedReg] = SPVal;
|
|
} else {
|
|
if (Item.FIEToInsert.IsStore)
|
|
PushOffsetByReg[Item.AffectedReg] = Item.FIEToInsert.StackOffset;
|
|
else
|
|
PopOffsetByReg[Item.AffectedReg] = Item.FIEToInsert.StackOffset;
|
|
}
|
|
}
|
|
|
|
LLVM_DEBUG({
|
|
dbgs() << "Creating stack access with SPVal = " << SPVal
|
|
<< "; stack offset = " << Item.FIEToInsert.StackOffset
|
|
<< " Is push = " << (Item.Action == WorklistItem::InsertPushOrPop)
|
|
<< "\n";
|
|
});
|
|
MCInst NewInst =
|
|
createStackAccess(SPVal, FPVal, Item.FIEToInsert,
|
|
Item.Action == WorklistItem::InsertPushOrPop);
|
|
if (InsertionPoint != CurBB->end()) {
|
|
LLVM_DEBUG({
|
|
dbgs() << "Adding before Inst: ";
|
|
InsertionPoint->dump();
|
|
dbgs() << "the following inst: ";
|
|
NewInst.dump();
|
|
});
|
|
BBIterTy Iter =
|
|
CurBB->insertInstruction(InsertionPoint, std::move(NewInst));
|
|
return ++Iter;
|
|
}
|
|
CurBB->addInstruction(std::move(NewInst));
|
|
LLVM_DEBUG(dbgs() << "Adding to BB!\n");
|
|
return CurBB->end();
|
|
}
|
|
|
|
BBIterTy ShrinkWrapping::processInsertionsList(
|
|
BBIterTy InsertionPoint, BinaryBasicBlock *CurBB,
|
|
std::vector<WorklistItem> &TodoList, int64_t SPVal, int64_t FPVal) {
|
|
bool HasInsertions = false;
|
|
for (WorklistItem &Item : TodoList) {
|
|
if (Item.Action == WorklistItem::Erase ||
|
|
Item.Action == WorklistItem::ChangeToAdjustment)
|
|
continue;
|
|
HasInsertions = true;
|
|
break;
|
|
}
|
|
|
|
if (!HasInsertions)
|
|
return InsertionPoint;
|
|
|
|
assert(((SPVal != StackPointerTracking::SUPERPOSITION &&
|
|
SPVal != StackPointerTracking::EMPTY) ||
|
|
(FPVal != StackPointerTracking::SUPERPOSITION &&
|
|
FPVal != StackPointerTracking::EMPTY)) &&
|
|
"Cannot insert if we have no idea of the stack state here");
|
|
|
|
// Revert the effect of PSPT for this location, we want SP Value before
|
|
// insertions
|
|
if (InsertionPoint == CurBB->end()) {
|
|
for (WorklistItem &Item : TodoList) {
|
|
if (Item.Action != WorklistItem::InsertPushOrPop)
|
|
continue;
|
|
if (Item.FIEToInsert.IsStore)
|
|
SPVal += Item.FIEToInsert.Size;
|
|
if (Item.FIEToInsert.IsLoad)
|
|
SPVal -= Item.FIEToInsert.Size;
|
|
}
|
|
}
|
|
|
|
// Reorder POPs to obey the correct dominance relation between them
|
|
llvm::stable_sort(TodoList, [&](const WorklistItem &A,
|
|
const WorklistItem &B) {
|
|
if ((A.Action != WorklistItem::InsertPushOrPop || !A.FIEToInsert.IsLoad) &&
|
|
(B.Action != WorklistItem::InsertPushOrPop || !B.FIEToInsert.IsLoad))
|
|
return false;
|
|
if ((A.Action != WorklistItem::InsertPushOrPop || !A.FIEToInsert.IsLoad))
|
|
return true;
|
|
if ((B.Action != WorklistItem::InsertPushOrPop || !B.FIEToInsert.IsLoad))
|
|
return false;
|
|
return DomOrder[B.AffectedReg] < DomOrder[A.AffectedReg];
|
|
});
|
|
|
|
// Process insertions
|
|
for (WorklistItem &Item : TodoList) {
|
|
if (Item.Action == WorklistItem::Erase ||
|
|
Item.Action == WorklistItem::ChangeToAdjustment)
|
|
continue;
|
|
|
|
InsertionPoint =
|
|
processInsertion(InsertionPoint, CurBB, Item, SPVal, FPVal);
|
|
if (Item.Action == WorklistItem::InsertPushOrPop &&
|
|
Item.FIEToInsert.IsStore)
|
|
SPVal -= Item.FIEToInsert.Size;
|
|
if (Item.Action == WorklistItem::InsertPushOrPop &&
|
|
Item.FIEToInsert.IsLoad)
|
|
SPVal += Item.FIEToInsert.Size;
|
|
}
|
|
return InsertionPoint;
|
|
}
|
|
|
|
bool ShrinkWrapping::processInsertions() {
|
|
PredictiveStackPointerTracking PSPT(BF, Todo, Info, AllocatorId);
|
|
PSPT.run();
|
|
|
|
bool Changes = false;
|
|
for (BinaryBasicBlock &BB : BF) {
|
|
// Process insertions before some inst.
|
|
for (auto I = BB.begin(); I != BB.end(); ++I) {
|
|
MCInst &Inst = *I;
|
|
auto TodoList = BC.MIB->tryGetAnnotationAs<std::vector<WorklistItem>>(
|
|
Inst, getAnnotationIndex());
|
|
if (!TodoList)
|
|
continue;
|
|
Changes = true;
|
|
std::vector<WorklistItem> List = *TodoList;
|
|
LLVM_DEBUG({
|
|
dbgs() << "Now processing insertions in " << BB.getName()
|
|
<< " before inst: ";
|
|
Inst.dump();
|
|
});
|
|
auto Iter = I;
|
|
std::pair<int, int> SPTState =
|
|
*PSPT.getStateAt(Iter == BB.begin() ? (ProgramPoint)&BB : &*(--Iter));
|
|
I = processInsertionsList(I, &BB, List, SPTState.first, SPTState.second);
|
|
}
|
|
// Process insertions at the end of bb
|
|
auto WRI = Todo.find(&BB);
|
|
if (WRI != Todo.end()) {
|
|
std::pair<int, int> SPTState = *PSPT.getStateAt(*BB.rbegin());
|
|
processInsertionsList(BB.end(), &BB, WRI->second, SPTState.first,
|
|
SPTState.second);
|
|
Changes = true;
|
|
}
|
|
}
|
|
return Changes;
|
|
}
|
|
|
|
void ShrinkWrapping::processDeletions() {
|
|
LivenessAnalysis &LA = Info.getLivenessAnalysis();
|
|
for (BinaryBasicBlock &BB : BF) {
|
|
for (auto II = BB.begin(); II != BB.end(); ++II) {
|
|
MCInst &Inst = *II;
|
|
auto TodoList = BC.MIB->tryGetAnnotationAs<std::vector<WorklistItem>>(
|
|
Inst, getAnnotationIndex());
|
|
if (!TodoList)
|
|
continue;
|
|
// Process all deletions
|
|
for (WorklistItem &Item : *TodoList) {
|
|
if (Item.Action != WorklistItem::Erase &&
|
|
Item.Action != WorklistItem::ChangeToAdjustment)
|
|
continue;
|
|
|
|
if (Item.Action == WorklistItem::ChangeToAdjustment) {
|
|
// Is flag reg alive across this func?
|
|
bool DontClobberFlags = LA.isAlive(&Inst, BC.MIB->getFlagsReg());
|
|
if (int Sz = BC.MIB->getPushSize(Inst)) {
|
|
BC.MIB->createStackPointerIncrement(Inst, Sz, DontClobberFlags);
|
|
continue;
|
|
}
|
|
if (int Sz = BC.MIB->getPopSize(Inst)) {
|
|
BC.MIB->createStackPointerDecrement(Inst, Sz, DontClobberFlags);
|
|
continue;
|
|
}
|
|
}
|
|
|
|
LLVM_DEBUG({
|
|
dbgs() << "Erasing: ";
|
|
BC.printInstruction(dbgs(), Inst);
|
|
});
|
|
II = std::prev(BB.eraseInstruction(II));
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
void ShrinkWrapping::rebuildCFI() {
|
|
const bool FP = Info.getStackPointerTracking().HasFramePointer;
|
|
Info.invalidateAll();
|
|
if (!FP) {
|
|
rebuildCFIForSP();
|
|
Info.invalidateAll();
|
|
}
|
|
for (unsigned I = 0, E = BC.MRI->getNumRegs(); I != E; ++I) {
|
|
if (PushOffsetByReg[I] == 0 || PopOffsetByReg[I] == 0)
|
|
continue;
|
|
const int64_t SPValPush = PushOffsetByReg[I];
|
|
const int64_t SPValPop = PopOffsetByReg[I];
|
|
insertUpdatedCFI(I, SPValPush, SPValPop);
|
|
Info.invalidateAll();
|
|
}
|
|
}
|
|
|
|
bool ShrinkWrapping::perform(bool HotOnly) {
|
|
HasDeletedOffsetCFIs = BitVector(BC.MRI->getNumRegs(), false);
|
|
PushOffsetByReg = std::vector<int64_t>(BC.MRI->getNumRegs(), 0LL);
|
|
PopOffsetByReg = std::vector<int64_t>(BC.MRI->getNumRegs(), 0LL);
|
|
|
|
// Update pass statistics
|
|
uint64_t TotalInstrs = 0ULL;
|
|
uint64_t TotalStoreInstrs = 0ULL;
|
|
for (BinaryBasicBlock *BB : BF.getLayout().blocks()) {
|
|
uint64_t BBExecCount = BB->getExecutionCount();
|
|
if (!BBExecCount || BBExecCount == BinaryBasicBlock::COUNT_NO_PROFILE)
|
|
continue;
|
|
for (const auto &Instr : *BB) {
|
|
if (BC.MIB->isPseudo(Instr))
|
|
continue;
|
|
if (BC.MIB->isStore(Instr))
|
|
TotalStoreInstrs += BBExecCount;
|
|
TotalInstrs += BBExecCount;
|
|
}
|
|
}
|
|
InstrDynamicCount += TotalInstrs;
|
|
StoreDynamicCount += TotalStoreInstrs;
|
|
|
|
if (!FA.hasFrameInfo(BF))
|
|
return false;
|
|
|
|
if (HotOnly && (BF.getKnownExecutionCount() < BC.getHotThreshold()))
|
|
return false;
|
|
|
|
if (opts::EqualizeBBCounts)
|
|
equalizeBBCounts(Info, BF);
|
|
|
|
if (BF.checkForAmbiguousJumpTables()) {
|
|
LLVM_DEBUG(dbgs() << "BOLT-DEBUG: ambiguous JTs in " << BF.getPrintName()
|
|
<< ".\n");
|
|
// We could call disambiguateJumpTables here, but it is probably not worth
|
|
// the cost (of duplicating potentially large jump tables that could regress
|
|
// dcache misses). Moreover, ambiguous JTs are rare and coming from code
|
|
// written in assembly language. Just bail.
|
|
return false;
|
|
}
|
|
SLM.initialize();
|
|
CSA.compute();
|
|
classifyCSRUses();
|
|
pruneUnwantedCSRs();
|
|
computeSaveLocations();
|
|
moveSaveRestores();
|
|
LLVM_DEBUG({
|
|
dbgs() << "Func before shrink-wrapping: \n";
|
|
BF.dump();
|
|
});
|
|
SLM.performChanges();
|
|
// Early exit if processInsertions doesn't detect any todo items
|
|
if (!processInsertions())
|
|
return false;
|
|
processDeletions();
|
|
if (foldIdenticalSplitEdges()) {
|
|
const std::pair<unsigned, uint64_t> Stats = BF.eraseInvalidBBs();
|
|
(void)Stats;
|
|
LLVM_DEBUG(dbgs() << "Deleted " << Stats.first
|
|
<< " redundant split edge BBs (" << Stats.second
|
|
<< " bytes) for " << BF.getPrintName() << "\n");
|
|
}
|
|
rebuildCFI();
|
|
// We may have split edges, creating BBs that need correct branching
|
|
BF.fixBranches();
|
|
LLVM_DEBUG({
|
|
dbgs() << "Func after shrink-wrapping: \n";
|
|
BF.dump();
|
|
});
|
|
return true;
|
|
}
|
|
|
|
void ShrinkWrapping::printStats() {
|
|
outs() << "BOLT-INFO: Shrink wrapping moved " << SpillsMovedRegularMode
|
|
<< " spills inserting load/stores and " << SpillsMovedPushPopMode
|
|
<< " spills inserting push/pops\n";
|
|
if (!InstrDynamicCount || !StoreDynamicCount)
|
|
return;
|
|
outs() << "BOLT-INFO: Shrink wrapping reduced " << SpillsMovedDynamicCount
|
|
<< " store executions ("
|
|
<< format("%.1lf%%",
|
|
(100.0 * SpillsMovedDynamicCount / InstrDynamicCount))
|
|
<< " total instructions executed, "
|
|
<< format("%.1lf%%",
|
|
(100.0 * SpillsMovedDynamicCount / StoreDynamicCount))
|
|
<< " store instructions)\n";
|
|
outs() << "BOLT-INFO: Shrink wrapping failed at reducing "
|
|
<< SpillsFailedDynamicCount << " store executions ("
|
|
<< format("%.1lf%%",
|
|
(100.0 * SpillsFailedDynamicCount / InstrDynamicCount))
|
|
<< " total instructions executed, "
|
|
<< format("%.1lf%%",
|
|
(100.0 * SpillsFailedDynamicCount / StoreDynamicCount))
|
|
<< " store instructions)\n";
|
|
}
|
|
|
|
// Operators necessary as a result of using MCAnnotation
|
|
raw_ostream &operator<<(raw_ostream &OS,
|
|
const std::vector<ShrinkWrapping::WorklistItem> &Vec) {
|
|
OS << "SWTodo[";
|
|
const char *Sep = "";
|
|
for (const ShrinkWrapping::WorklistItem &Item : Vec) {
|
|
OS << Sep;
|
|
switch (Item.Action) {
|
|
case ShrinkWrapping::WorklistItem::Erase:
|
|
OS << "Erase";
|
|
break;
|
|
case ShrinkWrapping::WorklistItem::ChangeToAdjustment:
|
|
OS << "ChangeToAdjustment";
|
|
break;
|
|
case ShrinkWrapping::WorklistItem::InsertLoadOrStore:
|
|
OS << "InsertLoadOrStore";
|
|
break;
|
|
case ShrinkWrapping::WorklistItem::InsertPushOrPop:
|
|
OS << "InsertPushOrPop";
|
|
break;
|
|
}
|
|
Sep = ", ";
|
|
}
|
|
OS << "]";
|
|
return OS;
|
|
}
|
|
|
|
raw_ostream &
|
|
operator<<(raw_ostream &OS,
|
|
const std::vector<StackLayoutModifier::WorklistItem> &Vec) {
|
|
OS << "SLMTodo[";
|
|
const char *Sep = "";
|
|
for (const StackLayoutModifier::WorklistItem &Item : Vec) {
|
|
OS << Sep;
|
|
switch (Item.Action) {
|
|
case StackLayoutModifier::WorklistItem::None:
|
|
OS << "None";
|
|
break;
|
|
case StackLayoutModifier::WorklistItem::AdjustLoadStoreOffset:
|
|
OS << "AdjustLoadStoreOffset";
|
|
break;
|
|
case StackLayoutModifier::WorklistItem::AdjustCFI:
|
|
OS << "AdjustCFI";
|
|
break;
|
|
}
|
|
Sep = ", ";
|
|
}
|
|
OS << "]";
|
|
return OS;
|
|
}
|
|
|
|
bool operator==(const ShrinkWrapping::WorklistItem &A,
|
|
const ShrinkWrapping::WorklistItem &B) {
|
|
return (A.Action == B.Action && A.AffectedReg == B.AffectedReg &&
|
|
A.Adjustment == B.Adjustment &&
|
|
A.FIEToInsert.IsLoad == B.FIEToInsert.IsLoad &&
|
|
A.FIEToInsert.IsStore == B.FIEToInsert.IsStore &&
|
|
A.FIEToInsert.RegOrImm == B.FIEToInsert.RegOrImm &&
|
|
A.FIEToInsert.Size == B.FIEToInsert.Size &&
|
|
A.FIEToInsert.IsSimple == B.FIEToInsert.IsSimple &&
|
|
A.FIEToInsert.StackOffset == B.FIEToInsert.StackOffset);
|
|
}
|
|
|
|
bool operator==(const StackLayoutModifier::WorklistItem &A,
|
|
const StackLayoutModifier::WorklistItem &B) {
|
|
return (A.Action == B.Action && A.OffsetUpdate == B.OffsetUpdate);
|
|
}
|
|
|
|
} // end namespace bolt
|
|
} // end namespace llvm
|