forked from OSchip/llvm-project
[Hexagon] Break up DAG mutations into separate classes, move to subtarget
llvm-svn: 311895
This commit is contained in:
parent
697297afa9
commit
95da97ec56
|
@ -12,11 +12,9 @@
|
|||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "HexagonInstrInfo.h"
|
||||
#include "HexagonMachineScheduler.h"
|
||||
#include "HexagonSubtarget.h"
|
||||
#include "llvm/CodeGen/MachineLoopInfo.h"
|
||||
#include "llvm/CodeGen/ScheduleDAGMutation.h"
|
||||
#include "llvm/IR/Function.h"
|
||||
|
||||
#include <iomanip>
|
||||
|
@ -25,9 +23,6 @@
|
|||
static cl::opt<bool> IgnoreBBRegPressure("ignore-bb-reg-pressure",
|
||||
cl::Hidden, cl::ZeroOrMore, cl::init(false));
|
||||
|
||||
static cl::opt<bool> SchedPredsCloser("sched-preds-closer",
|
||||
cl::Hidden, cl::ZeroOrMore, cl::init(true));
|
||||
|
||||
static cl::opt<unsigned> SchedDebugVerboseLevel("misched-verbose-level",
|
||||
cl::Hidden, cl::ZeroOrMore, cl::init(1));
|
||||
|
||||
|
@ -40,9 +35,6 @@ static cl::opt<bool> BotUseShorterTie("bot-use-shorter-tie",
|
|||
static cl::opt<bool> DisableTCTie("disable-tc-tie",
|
||||
cl::Hidden, cl::ZeroOrMore, cl::init(false));
|
||||
|
||||
static cl::opt<bool> SchedRetvalOptimization("sched-retval-optimization",
|
||||
cl::Hidden, cl::ZeroOrMore, cl::init(true));
|
||||
|
||||
// Check if the scheduler should penalize instructions that are available to
|
||||
// early due to a zero-latency dependence.
|
||||
static cl::opt<bool> CheckEarlyAvail("check-early-avail", cl::Hidden,
|
||||
|
@ -52,77 +44,6 @@ using namespace llvm;
|
|||
|
||||
#define DEBUG_TYPE "machine-scheduler"
|
||||
|
||||
// Check if a call and subsequent A2_tfrpi instructions should maintain
|
||||
// scheduling affinity. We are looking for the TFRI to be consumed in
|
||||
// the next instruction. This should help reduce the instances of
|
||||
// double register pairs being allocated and scheduled before a call
|
||||
// when not used until after the call. This situation is exacerbated
|
||||
// by the fact that we allocate the pair from the callee saves list,
|
||||
// leading to excess spills and restores.
|
||||
bool HexagonCallMutation::shouldTFRICallBind(const HexagonInstrInfo &HII,
|
||||
const SUnit &Inst1, const SUnit &Inst2) const {
|
||||
if (Inst1.getInstr()->getOpcode() != Hexagon::A2_tfrpi)
|
||||
return false;
|
||||
|
||||
// TypeXTYPE are 64 bit operations.
|
||||
unsigned Type = HII.getType(*Inst2.getInstr());
|
||||
if (Type == HexagonII::TypeS_2op || Type == HexagonII::TypeS_3op ||
|
||||
Type == HexagonII::TypeALU64 || Type == HexagonII::TypeM)
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
void HexagonCallMutation::apply(ScheduleDAGInstrs *DAG) {
|
||||
SUnit* LastSequentialCall = nullptr;
|
||||
unsigned VRegHoldingRet = 0;
|
||||
unsigned RetRegister;
|
||||
SUnit* LastUseOfRet = nullptr;
|
||||
auto &TRI = *DAG->MF.getSubtarget().getRegisterInfo();
|
||||
auto &HII = *DAG->MF.getSubtarget<HexagonSubtarget>().getInstrInfo();
|
||||
|
||||
// Currently we only catch the situation when compare gets scheduled
|
||||
// before preceding call.
|
||||
for (unsigned su = 0, e = DAG->SUnits.size(); su != e; ++su) {
|
||||
// Remember the call.
|
||||
if (DAG->SUnits[su].getInstr()->isCall())
|
||||
LastSequentialCall = &DAG->SUnits[su];
|
||||
// Look for a compare that defines a predicate.
|
||||
else if (DAG->SUnits[su].getInstr()->isCompare() && LastSequentialCall)
|
||||
DAG->SUnits[su].addPred(SDep(LastSequentialCall, SDep::Barrier));
|
||||
// Look for call and tfri* instructions.
|
||||
else if (SchedPredsCloser && LastSequentialCall && su > 1 && su < e-1 &&
|
||||
shouldTFRICallBind(HII, DAG->SUnits[su], DAG->SUnits[su+1]))
|
||||
DAG->SUnits[su].addPred(SDep(&DAG->SUnits[su-1], SDep::Barrier));
|
||||
// Prevent redundant register copies between two calls, which are caused by
|
||||
// both the return value and the argument for the next call being in %R0.
|
||||
// Example:
|
||||
// 1: <call1>
|
||||
// 2: %VregX = COPY %R0
|
||||
// 3: <use of %VregX>
|
||||
// 4: %R0 = ...
|
||||
// 5: <call2>
|
||||
// The scheduler would often swap 3 and 4, so an additional register is
|
||||
// needed. This code inserts a Barrier dependence between 3 & 4 to prevent
|
||||
// this. The same applies for %D0 and %V0/%W0, which are also handled.
|
||||
else if (SchedRetvalOptimization) {
|
||||
const MachineInstr *MI = DAG->SUnits[su].getInstr();
|
||||
if (MI->isCopy() && (MI->readsRegister(Hexagon::R0, &TRI) ||
|
||||
MI->readsRegister(Hexagon::V0, &TRI))) {
|
||||
// %vregX = COPY %R0
|
||||
VRegHoldingRet = MI->getOperand(0).getReg();
|
||||
RetRegister = MI->getOperand(1).getReg();
|
||||
LastUseOfRet = nullptr;
|
||||
} else if (VRegHoldingRet && MI->readsVirtualRegister(VRegHoldingRet))
|
||||
// <use of %vregX>
|
||||
LastUseOfRet = &DAG->SUnits[su];
|
||||
else if (LastUseOfRet && MI->definesRegister(RetRegister, &TRI))
|
||||
// %R0 = ...
|
||||
DAG->SUnits[su].addPred(SDep(LastUseOfRet, SDep::Barrier));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/// Save the last formed packet
|
||||
void VLIWResourceModel::savePacket() {
|
||||
OldPacket = Packet;
|
||||
|
|
|
@ -249,14 +249,6 @@ protected:
|
|||
#endif
|
||||
};
|
||||
|
||||
class HexagonCallMutation : public ScheduleDAGMutation {
|
||||
public:
|
||||
void apply(ScheduleDAGInstrs *DAG) override;
|
||||
private:
|
||||
bool shouldTFRICallBind(const HexagonInstrInfo &HII,
|
||||
const SUnit &Inst1, const SUnit &Inst2) const;
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
#endif
|
||||
|
|
|
@ -87,6 +87,13 @@ static cl::opt<bool> EnablePredicatedCalls("hexagon-pred-calls",
|
|||
cl::Hidden, cl::ZeroOrMore, cl::init(false),
|
||||
cl::desc("Consider calls to be predicable"));
|
||||
|
||||
static cl::opt<bool> SchedPredsCloser("sched-preds-closer",
|
||||
cl::Hidden, cl::ZeroOrMore, cl::init(true));
|
||||
|
||||
static cl::opt<bool> SchedRetvalOptimization("sched-retval-optimization",
|
||||
cl::Hidden, cl::ZeroOrMore, cl::init(true));
|
||||
|
||||
|
||||
void HexagonSubtarget::initializeEnvironment() {
|
||||
UseMemOps = false;
|
||||
ModeIEEERndNear = false;
|
||||
|
@ -126,6 +133,121 @@ HexagonSubtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS) {
|
|||
return *this;
|
||||
}
|
||||
|
||||
void HexagonSubtarget::UsrOverflowMutation::apply(ScheduleDAGInstrs *DAG) {
|
||||
for (SUnit &SU : DAG->SUnits) {
|
||||
if (!SU.isInstr())
|
||||
continue;
|
||||
SmallVector<SDep, 4> Erase;
|
||||
for (auto &D : SU.Preds)
|
||||
if (D.getKind() == SDep::Output && D.getReg() == Hexagon::USR_OVF)
|
||||
Erase.push_back(D);
|
||||
for (auto &E : Erase)
|
||||
SU.removePred(E);
|
||||
}
|
||||
}
|
||||
|
||||
void HexagonSubtarget::HVXMemLatencyMutation::apply(ScheduleDAGInstrs *DAG) {
|
||||
for (SUnit &SU : DAG->SUnits) {
|
||||
// Update the latency of chain edges between v60 vector load or store
|
||||
// instructions to be 1. These instruction cannot be scheduled in the
|
||||
// same packet.
|
||||
MachineInstr &MI1 = *SU.getInstr();
|
||||
auto *QII = static_cast<const HexagonInstrInfo*>(DAG->TII);
|
||||
bool IsStoreMI1 = MI1.mayStore();
|
||||
bool IsLoadMI1 = MI1.mayLoad();
|
||||
if (!QII->isHVXVec(MI1) || !(IsStoreMI1 || IsLoadMI1))
|
||||
continue;
|
||||
for (SDep &SI : SU.Succs) {
|
||||
if (SI.getKind() != SDep::Order || SI.getLatency() != 0)
|
||||
continue;
|
||||
MachineInstr &MI2 = *SI.getSUnit()->getInstr();
|
||||
if (!QII->isHVXVec(MI2))
|
||||
continue;
|
||||
if ((IsStoreMI1 && MI2.mayStore()) || (IsLoadMI1 && MI2.mayLoad())) {
|
||||
SI.setLatency(1);
|
||||
SU.setHeightDirty();
|
||||
// Change the dependence in the opposite direction too.
|
||||
for (SDep &PI : SI.getSUnit()->Preds) {
|
||||
if (PI.getSUnit() != &SU || PI.getKind() != SDep::Order)
|
||||
continue;
|
||||
PI.setLatency(1);
|
||||
SI.getSUnit()->setDepthDirty();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Check if a call and subsequent A2_tfrpi instructions should maintain
|
||||
// scheduling affinity. We are looking for the TFRI to be consumed in
|
||||
// the next instruction. This should help reduce the instances of
|
||||
// double register pairs being allocated and scheduled before a call
|
||||
// when not used until after the call. This situation is exacerbated
|
||||
// by the fact that we allocate the pair from the callee saves list,
|
||||
// leading to excess spills and restores.
|
||||
bool HexagonSubtarget::CallMutation::shouldTFRICallBind(
|
||||
const HexagonInstrInfo &HII, const SUnit &Inst1,
|
||||
const SUnit &Inst2) const {
|
||||
if (Inst1.getInstr()->getOpcode() != Hexagon::A2_tfrpi)
|
||||
return false;
|
||||
|
||||
// TypeXTYPE are 64 bit operations.
|
||||
unsigned Type = HII.getType(*Inst2.getInstr());
|
||||
return Type == HexagonII::TypeS_2op || Type == HexagonII::TypeS_3op ||
|
||||
Type == HexagonII::TypeALU64 || Type == HexagonII::TypeM;
|
||||
}
|
||||
|
||||
void HexagonSubtarget::CallMutation::apply(ScheduleDAGInstrs *DAG) {
|
||||
SUnit* LastSequentialCall = nullptr;
|
||||
unsigned VRegHoldingRet = 0;
|
||||
unsigned RetRegister;
|
||||
SUnit* LastUseOfRet = nullptr;
|
||||
auto &TRI = *DAG->MF.getSubtarget().getRegisterInfo();
|
||||
auto &HII = *DAG->MF.getSubtarget<HexagonSubtarget>().getInstrInfo();
|
||||
|
||||
// Currently we only catch the situation when compare gets scheduled
|
||||
// before preceding call.
|
||||
for (unsigned su = 0, e = DAG->SUnits.size(); su != e; ++su) {
|
||||
// Remember the call.
|
||||
if (DAG->SUnits[su].getInstr()->isCall())
|
||||
LastSequentialCall = &DAG->SUnits[su];
|
||||
// Look for a compare that defines a predicate.
|
||||
else if (DAG->SUnits[su].getInstr()->isCompare() && LastSequentialCall)
|
||||
DAG->SUnits[su].addPred(SDep(LastSequentialCall, SDep::Barrier));
|
||||
// Look for call and tfri* instructions.
|
||||
else if (SchedPredsCloser && LastSequentialCall && su > 1 && su < e-1 &&
|
||||
shouldTFRICallBind(HII, DAG->SUnits[su], DAG->SUnits[su+1]))
|
||||
DAG->SUnits[su].addPred(SDep(&DAG->SUnits[su-1], SDep::Barrier));
|
||||
// Prevent redundant register copies between two calls, which are caused by
|
||||
// both the return value and the argument for the next call being in %R0.
|
||||
// Example:
|
||||
// 1: <call1>
|
||||
// 2: %VregX = COPY %R0
|
||||
// 3: <use of %VregX>
|
||||
// 4: %R0 = ...
|
||||
// 5: <call2>
|
||||
// The scheduler would often swap 3 and 4, so an additional register is
|
||||
// needed. This code inserts a Barrier dependence between 3 & 4 to prevent
|
||||
// this. The same applies for %D0 and %V0/%W0, which are also handled.
|
||||
else if (SchedRetvalOptimization) {
|
||||
const MachineInstr *MI = DAG->SUnits[su].getInstr();
|
||||
if (MI->isCopy() && (MI->readsRegister(Hexagon::R0, &TRI) ||
|
||||
MI->readsRegister(Hexagon::V0, &TRI))) {
|
||||
// %vregX = COPY %R0
|
||||
VRegHoldingRet = MI->getOperand(0).getReg();
|
||||
RetRegister = MI->getOperand(1).getReg();
|
||||
LastUseOfRet = nullptr;
|
||||
} else if (VRegHoldingRet && MI->readsVirtualRegister(VRegHoldingRet))
|
||||
// <use of %vregX>
|
||||
LastUseOfRet = &DAG->SUnits[su];
|
||||
else if (LastUseOfRet && MI->definesRegister(RetRegister, &TRI))
|
||||
// %R0 = ...
|
||||
DAG->SUnits[su].addPred(SDep(LastUseOfRet, SDep::Barrier));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
HexagonSubtarget::HexagonSubtarget(const Triple &TT, StringRef CPU,
|
||||
StringRef FS, const TargetMachine &TM)
|
||||
: HexagonGenSubtargetInfo(TT, CPU, FS), CPUString(CPU),
|
||||
|
@ -204,59 +326,16 @@ void HexagonSubtarget::adjustSchedDependency(SUnit *Src, SUnit *Dst,
|
|||
updateLatency(*SrcInst, *DstInst, Dep);
|
||||
}
|
||||
|
||||
void HexagonSubtarget::HexagonDAGMutation::apply(ScheduleDAGInstrs *DAG) {
|
||||
for (auto &SU : DAG->SUnits) {
|
||||
if (!SU.isInstr())
|
||||
continue;
|
||||
SmallVector<SDep, 4> Erase;
|
||||
for (auto &D : SU.Preds)
|
||||
if (D.getKind() == SDep::Output && D.getReg() == Hexagon::USR_OVF)
|
||||
Erase.push_back(D);
|
||||
for (auto &E : Erase)
|
||||
SU.removePred(E);
|
||||
}
|
||||
|
||||
for (auto &SU : DAG->SUnits) {
|
||||
// Update the latency of chain edges between v60 vector load or store
|
||||
// instructions to be 1. These instruction cannot be scheduled in the
|
||||
// same packet.
|
||||
MachineInstr &MI1 = *SU.getInstr();
|
||||
auto *QII = static_cast<const HexagonInstrInfo*>(DAG->TII);
|
||||
bool IsStoreMI1 = MI1.mayStore();
|
||||
bool IsLoadMI1 = MI1.mayLoad();
|
||||
if (!QII->isHVXVec(MI1) || !(IsStoreMI1 || IsLoadMI1))
|
||||
continue;
|
||||
for (auto &SI : SU.Succs) {
|
||||
if (SI.getKind() != SDep::Order || SI.getLatency() != 0)
|
||||
continue;
|
||||
MachineInstr &MI2 = *SI.getSUnit()->getInstr();
|
||||
if (!QII->isHVXVec(MI2))
|
||||
continue;
|
||||
if ((IsStoreMI1 && MI2.mayStore()) || (IsLoadMI1 && MI2.mayLoad())) {
|
||||
SI.setLatency(1);
|
||||
SU.setHeightDirty();
|
||||
// Change the dependence in the opposite direction too.
|
||||
for (auto &PI : SI.getSUnit()->Preds) {
|
||||
if (PI.getSUnit() != &SU || PI.getKind() != SDep::Order)
|
||||
continue;
|
||||
PI.setLatency(1);
|
||||
SI.getSUnit()->setDepthDirty();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void HexagonSubtarget::getPostRAMutations(
|
||||
std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations) const {
|
||||
Mutations.push_back(
|
||||
llvm::make_unique<HexagonSubtarget::HexagonDAGMutation>());
|
||||
Mutations.push_back(llvm::make_unique<UsrOverflowMutation>());
|
||||
Mutations.push_back(llvm::make_unique<HVXMemLatencyMutation>());
|
||||
}
|
||||
|
||||
void HexagonSubtarget::getSMSMutations(
|
||||
std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations) const {
|
||||
Mutations.push_back(
|
||||
llvm::make_unique<HexagonSubtarget::HexagonDAGMutation>());
|
||||
Mutations.push_back(llvm::make_unique<UsrOverflowMutation>());
|
||||
Mutations.push_back(llvm::make_unique<HVXMemLatencyMutation>());
|
||||
}
|
||||
|
||||
// Pin the vtable to this file.
|
||||
|
|
|
@ -56,10 +56,18 @@ public:
|
|||
/// default for V60.
|
||||
bool UseBSBScheduling;
|
||||
|
||||
class HexagonDAGMutation : public ScheduleDAGMutation {
|
||||
public:
|
||||
struct UsrOverflowMutation : public ScheduleDAGMutation {
|
||||
void apply(ScheduleDAGInstrs *DAG) override;
|
||||
};
|
||||
struct HVXMemLatencyMutation : public ScheduleDAGMutation {
|
||||
void apply(ScheduleDAGInstrs *DAG) override;
|
||||
};
|
||||
struct CallMutation : public ScheduleDAGMutation {
|
||||
void apply(ScheduleDAGInstrs *DAG) override;
|
||||
private:
|
||||
bool shouldTFRICallBind(const HexagonInstrInfo &HII,
|
||||
const SUnit &Inst1, const SUnit &Inst2) const;
|
||||
};
|
||||
|
||||
private:
|
||||
std::string CPUString;
|
||||
|
|
|
@ -102,8 +102,9 @@ int HexagonTargetMachineModule = 0;
|
|||
static ScheduleDAGInstrs *createVLIWMachineSched(MachineSchedContext *C) {
|
||||
ScheduleDAGMILive *DAG =
|
||||
new VLIWMachineScheduler(C, make_unique<ConvergingVLIWScheduler>());
|
||||
DAG->addMutation(make_unique<HexagonSubtarget::HexagonDAGMutation>());
|
||||
DAG->addMutation(make_unique<HexagonCallMutation>());
|
||||
DAG->addMutation(make_unique<HexagonSubtarget::UsrOverflowMutation>());
|
||||
DAG->addMutation(make_unique<HexagonSubtarget::HVXMemLatencyMutation>());
|
||||
DAG->addMutation(make_unique<HexagonSubtarget::CallMutation>());
|
||||
DAG->addMutation(createCopyConstrainDAGMutation(DAG->TII, DAG->TRI));
|
||||
return DAG;
|
||||
}
|
||||
|
|
|
@ -103,7 +103,8 @@ HexagonPacketizerList::HexagonPacketizerList(MachineFunction &MF,
|
|||
HII = MF.getSubtarget<HexagonSubtarget>().getInstrInfo();
|
||||
HRI = MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
|
||||
|
||||
addMutation(make_unique<HexagonSubtarget::HexagonDAGMutation>());
|
||||
addMutation(make_unique<HexagonSubtarget::UsrOverflowMutation>());
|
||||
addMutation(make_unique<HexagonSubtarget::HVXMemLatencyMutation>());
|
||||
}
|
||||
|
||||
// Check if FirstI modifies a register that SecondI reads.
|
||||
|
|
Loading…
Reference in New Issue