[Hexagon] Break up DAG mutations into separate classes, move to subtarget

llvm-svn: 311895
This commit is contained in:
Krzysztof Parzyszek 2017-08-28 16:24:22 +00:00
parent 697297afa9
commit 95da97ec56
6 changed files with 141 additions and 139 deletions

View File

@ -12,11 +12,9 @@
//
//===----------------------------------------------------------------------===//
#include "HexagonInstrInfo.h"
#include "HexagonMachineScheduler.h"
#include "HexagonSubtarget.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/ScheduleDAGMutation.h"
#include "llvm/IR/Function.h"
#include <iomanip>
@ -25,9 +23,6 @@
static cl::opt<bool> IgnoreBBRegPressure("ignore-bb-reg-pressure",
cl::Hidden, cl::ZeroOrMore, cl::init(false));
static cl::opt<bool> SchedPredsCloser("sched-preds-closer",
cl::Hidden, cl::ZeroOrMore, cl::init(true));
static cl::opt<unsigned> SchedDebugVerboseLevel("misched-verbose-level",
cl::Hidden, cl::ZeroOrMore, cl::init(1));
@ -40,9 +35,6 @@ static cl::opt<bool> BotUseShorterTie("bot-use-shorter-tie",
static cl::opt<bool> DisableTCTie("disable-tc-tie",
cl::Hidden, cl::ZeroOrMore, cl::init(false));
static cl::opt<bool> SchedRetvalOptimization("sched-retval-optimization",
cl::Hidden, cl::ZeroOrMore, cl::init(true));
// Check if the scheduler should penalize instructions that are available to
// early due to a zero-latency dependence.
static cl::opt<bool> CheckEarlyAvail("check-early-avail", cl::Hidden,
@ -52,77 +44,6 @@ using namespace llvm;
#define DEBUG_TYPE "machine-scheduler"
// Check if a call and subsequent A2_tfrpi instructions should maintain
// scheduling affinity. We are looking for the TFRI to be consumed in
// the next instruction. This should help reduce the instances of
// double register pairs being allocated and scheduled before a call
// when not used until after the call. This situation is exacerbated
// by the fact that we allocate the pair from the callee saves list,
// leading to excess spills and restores.
bool HexagonCallMutation::shouldTFRICallBind(const HexagonInstrInfo &HII,
const SUnit &Inst1, const SUnit &Inst2) const {
if (Inst1.getInstr()->getOpcode() != Hexagon::A2_tfrpi)
return false;
// TypeXTYPE are 64 bit operations.
unsigned Type = HII.getType(*Inst2.getInstr());
if (Type == HexagonII::TypeS_2op || Type == HexagonII::TypeS_3op ||
Type == HexagonII::TypeALU64 || Type == HexagonII::TypeM)
return true;
return false;
}
void HexagonCallMutation::apply(ScheduleDAGInstrs *DAG) {
SUnit* LastSequentialCall = nullptr;
unsigned VRegHoldingRet = 0;
unsigned RetRegister;
SUnit* LastUseOfRet = nullptr;
auto &TRI = *DAG->MF.getSubtarget().getRegisterInfo();
auto &HII = *DAG->MF.getSubtarget<HexagonSubtarget>().getInstrInfo();
// Currently we only catch the situation when compare gets scheduled
// before preceding call.
for (unsigned su = 0, e = DAG->SUnits.size(); su != e; ++su) {
// Remember the call.
if (DAG->SUnits[su].getInstr()->isCall())
LastSequentialCall = &DAG->SUnits[su];
// Look for a compare that defines a predicate.
else if (DAG->SUnits[su].getInstr()->isCompare() && LastSequentialCall)
DAG->SUnits[su].addPred(SDep(LastSequentialCall, SDep::Barrier));
// Look for call and tfri* instructions.
else if (SchedPredsCloser && LastSequentialCall && su > 1 && su < e-1 &&
shouldTFRICallBind(HII, DAG->SUnits[su], DAG->SUnits[su+1]))
DAG->SUnits[su].addPred(SDep(&DAG->SUnits[su-1], SDep::Barrier));
// Prevent redundant register copies between two calls, which are caused by
// both the return value and the argument for the next call being in %R0.
// Example:
// 1: <call1>
// 2: %VregX = COPY %R0
// 3: <use of %VregX>
// 4: %R0 = ...
// 5: <call2>
// The scheduler would often swap 3 and 4, so an additional register is
// needed. This code inserts a Barrier dependence between 3 & 4 to prevent
// this. The same applies for %D0 and %V0/%W0, which are also handled.
else if (SchedRetvalOptimization) {
const MachineInstr *MI = DAG->SUnits[su].getInstr();
if (MI->isCopy() && (MI->readsRegister(Hexagon::R0, &TRI) ||
MI->readsRegister(Hexagon::V0, &TRI))) {
// %vregX = COPY %R0
VRegHoldingRet = MI->getOperand(0).getReg();
RetRegister = MI->getOperand(1).getReg();
LastUseOfRet = nullptr;
} else if (VRegHoldingRet && MI->readsVirtualRegister(VRegHoldingRet))
// <use of %vregX>
LastUseOfRet = &DAG->SUnits[su];
else if (LastUseOfRet && MI->definesRegister(RetRegister, &TRI))
// %R0 = ...
DAG->SUnits[su].addPred(SDep(LastUseOfRet, SDep::Barrier));
}
}
}
/// Save the last formed packet
void VLIWResourceModel::savePacket() {
OldPacket = Packet;

View File

@ -249,14 +249,6 @@ protected:
#endif
};
class HexagonCallMutation : public ScheduleDAGMutation {
public:
void apply(ScheduleDAGInstrs *DAG) override;
private:
bool shouldTFRICallBind(const HexagonInstrInfo &HII,
const SUnit &Inst1, const SUnit &Inst2) const;
};
} // namespace
#endif

View File

@ -87,6 +87,13 @@ static cl::opt<bool> EnablePredicatedCalls("hexagon-pred-calls",
cl::Hidden, cl::ZeroOrMore, cl::init(false),
cl::desc("Consider calls to be predicable"));
static cl::opt<bool> SchedPredsCloser("sched-preds-closer",
cl::Hidden, cl::ZeroOrMore, cl::init(true));
static cl::opt<bool> SchedRetvalOptimization("sched-retval-optimization",
cl::Hidden, cl::ZeroOrMore, cl::init(true));
void HexagonSubtarget::initializeEnvironment() {
UseMemOps = false;
ModeIEEERndNear = false;
@ -126,6 +133,121 @@ HexagonSubtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS) {
return *this;
}
void HexagonSubtarget::UsrOverflowMutation::apply(ScheduleDAGInstrs *DAG) {
for (SUnit &SU : DAG->SUnits) {
if (!SU.isInstr())
continue;
SmallVector<SDep, 4> Erase;
for (auto &D : SU.Preds)
if (D.getKind() == SDep::Output && D.getReg() == Hexagon::USR_OVF)
Erase.push_back(D);
for (auto &E : Erase)
SU.removePred(E);
}
}
void HexagonSubtarget::HVXMemLatencyMutation::apply(ScheduleDAGInstrs *DAG) {
for (SUnit &SU : DAG->SUnits) {
// Update the latency of chain edges between v60 vector load or store
// instructions to be 1. These instruction cannot be scheduled in the
// same packet.
MachineInstr &MI1 = *SU.getInstr();
auto *QII = static_cast<const HexagonInstrInfo*>(DAG->TII);
bool IsStoreMI1 = MI1.mayStore();
bool IsLoadMI1 = MI1.mayLoad();
if (!QII->isHVXVec(MI1) || !(IsStoreMI1 || IsLoadMI1))
continue;
for (SDep &SI : SU.Succs) {
if (SI.getKind() != SDep::Order || SI.getLatency() != 0)
continue;
MachineInstr &MI2 = *SI.getSUnit()->getInstr();
if (!QII->isHVXVec(MI2))
continue;
if ((IsStoreMI1 && MI2.mayStore()) || (IsLoadMI1 && MI2.mayLoad())) {
SI.setLatency(1);
SU.setHeightDirty();
// Change the dependence in the opposite direction too.
for (SDep &PI : SI.getSUnit()->Preds) {
if (PI.getSUnit() != &SU || PI.getKind() != SDep::Order)
continue;
PI.setLatency(1);
SI.getSUnit()->setDepthDirty();
}
}
}
}
}
// Check if a call and subsequent A2_tfrpi instructions should maintain
// scheduling affinity. We are looking for the TFRI to be consumed in
// the next instruction. This should help reduce the instances of
// double register pairs being allocated and scheduled before a call
// when not used until after the call. This situation is exacerbated
// by the fact that we allocate the pair from the callee saves list,
// leading to excess spills and restores.
bool HexagonSubtarget::CallMutation::shouldTFRICallBind(
const HexagonInstrInfo &HII, const SUnit &Inst1,
const SUnit &Inst2) const {
if (Inst1.getInstr()->getOpcode() != Hexagon::A2_tfrpi)
return false;
// TypeXTYPE are 64 bit operations.
unsigned Type = HII.getType(*Inst2.getInstr());
return Type == HexagonII::TypeS_2op || Type == HexagonII::TypeS_3op ||
Type == HexagonII::TypeALU64 || Type == HexagonII::TypeM;
}
void HexagonSubtarget::CallMutation::apply(ScheduleDAGInstrs *DAG) {
SUnit* LastSequentialCall = nullptr;
unsigned VRegHoldingRet = 0;
unsigned RetRegister;
SUnit* LastUseOfRet = nullptr;
auto &TRI = *DAG->MF.getSubtarget().getRegisterInfo();
auto &HII = *DAG->MF.getSubtarget<HexagonSubtarget>().getInstrInfo();
// Currently we only catch the situation when compare gets scheduled
// before preceding call.
for (unsigned su = 0, e = DAG->SUnits.size(); su != e; ++su) {
// Remember the call.
if (DAG->SUnits[su].getInstr()->isCall())
LastSequentialCall = &DAG->SUnits[su];
// Look for a compare that defines a predicate.
else if (DAG->SUnits[su].getInstr()->isCompare() && LastSequentialCall)
DAG->SUnits[su].addPred(SDep(LastSequentialCall, SDep::Barrier));
// Look for call and tfri* instructions.
else if (SchedPredsCloser && LastSequentialCall && su > 1 && su < e-1 &&
shouldTFRICallBind(HII, DAG->SUnits[su], DAG->SUnits[su+1]))
DAG->SUnits[su].addPred(SDep(&DAG->SUnits[su-1], SDep::Barrier));
// Prevent redundant register copies between two calls, which are caused by
// both the return value and the argument for the next call being in %R0.
// Example:
// 1: <call1>
// 2: %VregX = COPY %R0
// 3: <use of %VregX>
// 4: %R0 = ...
// 5: <call2>
// The scheduler would often swap 3 and 4, so an additional register is
// needed. This code inserts a Barrier dependence between 3 & 4 to prevent
// this. The same applies for %D0 and %V0/%W0, which are also handled.
else if (SchedRetvalOptimization) {
const MachineInstr *MI = DAG->SUnits[su].getInstr();
if (MI->isCopy() && (MI->readsRegister(Hexagon::R0, &TRI) ||
MI->readsRegister(Hexagon::V0, &TRI))) {
// %vregX = COPY %R0
VRegHoldingRet = MI->getOperand(0).getReg();
RetRegister = MI->getOperand(1).getReg();
LastUseOfRet = nullptr;
} else if (VRegHoldingRet && MI->readsVirtualRegister(VRegHoldingRet))
// <use of %vregX>
LastUseOfRet = &DAG->SUnits[su];
else if (LastUseOfRet && MI->definesRegister(RetRegister, &TRI))
// %R0 = ...
DAG->SUnits[su].addPred(SDep(LastUseOfRet, SDep::Barrier));
}
}
}
HexagonSubtarget::HexagonSubtarget(const Triple &TT, StringRef CPU,
StringRef FS, const TargetMachine &TM)
: HexagonGenSubtargetInfo(TT, CPU, FS), CPUString(CPU),
@ -204,59 +326,16 @@ void HexagonSubtarget::adjustSchedDependency(SUnit *Src, SUnit *Dst,
updateLatency(*SrcInst, *DstInst, Dep);
}
void HexagonSubtarget::HexagonDAGMutation::apply(ScheduleDAGInstrs *DAG) {
for (auto &SU : DAG->SUnits) {
if (!SU.isInstr())
continue;
SmallVector<SDep, 4> Erase;
for (auto &D : SU.Preds)
if (D.getKind() == SDep::Output && D.getReg() == Hexagon::USR_OVF)
Erase.push_back(D);
for (auto &E : Erase)
SU.removePred(E);
}
for (auto &SU : DAG->SUnits) {
// Update the latency of chain edges between v60 vector load or store
// instructions to be 1. These instruction cannot be scheduled in the
// same packet.
MachineInstr &MI1 = *SU.getInstr();
auto *QII = static_cast<const HexagonInstrInfo*>(DAG->TII);
bool IsStoreMI1 = MI1.mayStore();
bool IsLoadMI1 = MI1.mayLoad();
if (!QII->isHVXVec(MI1) || !(IsStoreMI1 || IsLoadMI1))
continue;
for (auto &SI : SU.Succs) {
if (SI.getKind() != SDep::Order || SI.getLatency() != 0)
continue;
MachineInstr &MI2 = *SI.getSUnit()->getInstr();
if (!QII->isHVXVec(MI2))
continue;
if ((IsStoreMI1 && MI2.mayStore()) || (IsLoadMI1 && MI2.mayLoad())) {
SI.setLatency(1);
SU.setHeightDirty();
// Change the dependence in the opposite direction too.
for (auto &PI : SI.getSUnit()->Preds) {
if (PI.getSUnit() != &SU || PI.getKind() != SDep::Order)
continue;
PI.setLatency(1);
SI.getSUnit()->setDepthDirty();
}
}
}
}
}
void HexagonSubtarget::getPostRAMutations(
std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations) const {
Mutations.push_back(
llvm::make_unique<HexagonSubtarget::HexagonDAGMutation>());
Mutations.push_back(llvm::make_unique<UsrOverflowMutation>());
Mutations.push_back(llvm::make_unique<HVXMemLatencyMutation>());
}
void HexagonSubtarget::getSMSMutations(
std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations) const {
Mutations.push_back(
llvm::make_unique<HexagonSubtarget::HexagonDAGMutation>());
Mutations.push_back(llvm::make_unique<UsrOverflowMutation>());
Mutations.push_back(llvm::make_unique<HVXMemLatencyMutation>());
}
// Pin the vtable to this file.

View File

@ -56,10 +56,18 @@ public:
/// default for V60.
bool UseBSBScheduling;
class HexagonDAGMutation : public ScheduleDAGMutation {
public:
struct UsrOverflowMutation : public ScheduleDAGMutation {
void apply(ScheduleDAGInstrs *DAG) override;
};
struct HVXMemLatencyMutation : public ScheduleDAGMutation {
void apply(ScheduleDAGInstrs *DAG) override;
};
struct CallMutation : public ScheduleDAGMutation {
void apply(ScheduleDAGInstrs *DAG) override;
private:
bool shouldTFRICallBind(const HexagonInstrInfo &HII,
const SUnit &Inst1, const SUnit &Inst2) const;
};
private:
std::string CPUString;

View File

@ -102,8 +102,9 @@ int HexagonTargetMachineModule = 0;
static ScheduleDAGInstrs *createVLIWMachineSched(MachineSchedContext *C) {
ScheduleDAGMILive *DAG =
new VLIWMachineScheduler(C, make_unique<ConvergingVLIWScheduler>());
DAG->addMutation(make_unique<HexagonSubtarget::HexagonDAGMutation>());
DAG->addMutation(make_unique<HexagonCallMutation>());
DAG->addMutation(make_unique<HexagonSubtarget::UsrOverflowMutation>());
DAG->addMutation(make_unique<HexagonSubtarget::HVXMemLatencyMutation>());
DAG->addMutation(make_unique<HexagonSubtarget::CallMutation>());
DAG->addMutation(createCopyConstrainDAGMutation(DAG->TII, DAG->TRI));
return DAG;
}

View File

@ -103,7 +103,8 @@ HexagonPacketizerList::HexagonPacketizerList(MachineFunction &MF,
HII = MF.getSubtarget<HexagonSubtarget>().getInstrInfo();
HRI = MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
addMutation(make_unique<HexagonSubtarget::HexagonDAGMutation>());
addMutation(make_unique<HexagonSubtarget::UsrOverflowMutation>());
addMutation(make_unique<HexagonSubtarget::HVXMemLatencyMutation>());
}
// Check if FirstI modifies a register that SecondI reads.