From 95da97ec563e5f93c0d065a9a65dfbf40f9c218d Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Mon, 28 Aug 2017 16:24:22 +0000 Subject: [PATCH] [Hexagon] Break up DAG mutations into separate classes, move to subtarget llvm-svn: 311895 --- .../Hexagon/HexagonMachineScheduler.cpp | 79 -------- .../Target/Hexagon/HexagonMachineScheduler.h | 8 - llvm/lib/Target/Hexagon/HexagonSubtarget.cpp | 173 +++++++++++++----- llvm/lib/Target/Hexagon/HexagonSubtarget.h | 12 +- .../Target/Hexagon/HexagonTargetMachine.cpp | 5 +- .../Target/Hexagon/HexagonVLIWPacketizer.cpp | 3 +- 6 files changed, 141 insertions(+), 139 deletions(-) diff --git a/llvm/lib/Target/Hexagon/HexagonMachineScheduler.cpp b/llvm/lib/Target/Hexagon/HexagonMachineScheduler.cpp index 375a64de7f55..6a252df7fc9a 100644 --- a/llvm/lib/Target/Hexagon/HexagonMachineScheduler.cpp +++ b/llvm/lib/Target/Hexagon/HexagonMachineScheduler.cpp @@ -12,11 +12,9 @@ // //===----------------------------------------------------------------------===// -#include "HexagonInstrInfo.h" #include "HexagonMachineScheduler.h" #include "HexagonSubtarget.h" #include "llvm/CodeGen/MachineLoopInfo.h" -#include "llvm/CodeGen/ScheduleDAGMutation.h" #include "llvm/IR/Function.h" #include @@ -25,9 +23,6 @@ static cl::opt IgnoreBBRegPressure("ignore-bb-reg-pressure", cl::Hidden, cl::ZeroOrMore, cl::init(false)); -static cl::opt SchedPredsCloser("sched-preds-closer", - cl::Hidden, cl::ZeroOrMore, cl::init(true)); - static cl::opt SchedDebugVerboseLevel("misched-verbose-level", cl::Hidden, cl::ZeroOrMore, cl::init(1)); @@ -40,9 +35,6 @@ static cl::opt BotUseShorterTie("bot-use-shorter-tie", static cl::opt DisableTCTie("disable-tc-tie", cl::Hidden, cl::ZeroOrMore, cl::init(false)); -static cl::opt SchedRetvalOptimization("sched-retval-optimization", - cl::Hidden, cl::ZeroOrMore, cl::init(true)); - // Check if the scheduler should penalize instructions that are available to // early due to a zero-latency dependence. static cl::opt CheckEarlyAvail("check-early-avail", cl::Hidden, @@ -52,77 +44,6 @@ using namespace llvm; #define DEBUG_TYPE "machine-scheduler" -// Check if a call and subsequent A2_tfrpi instructions should maintain -// scheduling affinity. We are looking for the TFRI to be consumed in -// the next instruction. This should help reduce the instances of -// double register pairs being allocated and scheduled before a call -// when not used until after the call. This situation is exacerbated -// by the fact that we allocate the pair from the callee saves list, -// leading to excess spills and restores. -bool HexagonCallMutation::shouldTFRICallBind(const HexagonInstrInfo &HII, - const SUnit &Inst1, const SUnit &Inst2) const { - if (Inst1.getInstr()->getOpcode() != Hexagon::A2_tfrpi) - return false; - - // TypeXTYPE are 64 bit operations. - unsigned Type = HII.getType(*Inst2.getInstr()); - if (Type == HexagonII::TypeS_2op || Type == HexagonII::TypeS_3op || - Type == HexagonII::TypeALU64 || Type == HexagonII::TypeM) - return true; - return false; -} - -void HexagonCallMutation::apply(ScheduleDAGInstrs *DAG) { - SUnit* LastSequentialCall = nullptr; - unsigned VRegHoldingRet = 0; - unsigned RetRegister; - SUnit* LastUseOfRet = nullptr; - auto &TRI = *DAG->MF.getSubtarget().getRegisterInfo(); - auto &HII = *DAG->MF.getSubtarget().getInstrInfo(); - - // Currently we only catch the situation when compare gets scheduled - // before preceding call. - for (unsigned su = 0, e = DAG->SUnits.size(); su != e; ++su) { - // Remember the call. - if (DAG->SUnits[su].getInstr()->isCall()) - LastSequentialCall = &DAG->SUnits[su]; - // Look for a compare that defines a predicate. - else if (DAG->SUnits[su].getInstr()->isCompare() && LastSequentialCall) - DAG->SUnits[su].addPred(SDep(LastSequentialCall, SDep::Barrier)); - // Look for call and tfri* instructions. - else if (SchedPredsCloser && LastSequentialCall && su > 1 && su < e-1 && - shouldTFRICallBind(HII, DAG->SUnits[su], DAG->SUnits[su+1])) - DAG->SUnits[su].addPred(SDep(&DAG->SUnits[su-1], SDep::Barrier)); - // Prevent redundant register copies between two calls, which are caused by - // both the return value and the argument for the next call being in %R0. - // Example: - // 1: - // 2: %VregX = COPY %R0 - // 3: - // 4: %R0 = ... - // 5: - // The scheduler would often swap 3 and 4, so an additional register is - // needed. This code inserts a Barrier dependence between 3 & 4 to prevent - // this. The same applies for %D0 and %V0/%W0, which are also handled. - else if (SchedRetvalOptimization) { - const MachineInstr *MI = DAG->SUnits[su].getInstr(); - if (MI->isCopy() && (MI->readsRegister(Hexagon::R0, &TRI) || - MI->readsRegister(Hexagon::V0, &TRI))) { - // %vregX = COPY %R0 - VRegHoldingRet = MI->getOperand(0).getReg(); - RetRegister = MI->getOperand(1).getReg(); - LastUseOfRet = nullptr; - } else if (VRegHoldingRet && MI->readsVirtualRegister(VRegHoldingRet)) - // - LastUseOfRet = &DAG->SUnits[su]; - else if (LastUseOfRet && MI->definesRegister(RetRegister, &TRI)) - // %R0 = ... - DAG->SUnits[su].addPred(SDep(LastUseOfRet, SDep::Barrier)); - } - } -} - - /// Save the last formed packet void VLIWResourceModel::savePacket() { OldPacket = Packet; diff --git a/llvm/lib/Target/Hexagon/HexagonMachineScheduler.h b/llvm/lib/Target/Hexagon/HexagonMachineScheduler.h index 0af29c22765d..935bcc9f8292 100644 --- a/llvm/lib/Target/Hexagon/HexagonMachineScheduler.h +++ b/llvm/lib/Target/Hexagon/HexagonMachineScheduler.h @@ -249,14 +249,6 @@ protected: #endif }; -class HexagonCallMutation : public ScheduleDAGMutation { -public: - void apply(ScheduleDAGInstrs *DAG) override; -private: - bool shouldTFRICallBind(const HexagonInstrInfo &HII, - const SUnit &Inst1, const SUnit &Inst2) const; -}; - } // namespace #endif diff --git a/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp b/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp index 0aada8a53c97..d9fa3b2548e3 100644 --- a/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp +++ b/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp @@ -87,6 +87,13 @@ static cl::opt EnablePredicatedCalls("hexagon-pred-calls", cl::Hidden, cl::ZeroOrMore, cl::init(false), cl::desc("Consider calls to be predicable")); +static cl::opt SchedPredsCloser("sched-preds-closer", + cl::Hidden, cl::ZeroOrMore, cl::init(true)); + +static cl::opt SchedRetvalOptimization("sched-retval-optimization", + cl::Hidden, cl::ZeroOrMore, cl::init(true)); + + void HexagonSubtarget::initializeEnvironment() { UseMemOps = false; ModeIEEERndNear = false; @@ -126,6 +133,121 @@ HexagonSubtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS) { return *this; } +void HexagonSubtarget::UsrOverflowMutation::apply(ScheduleDAGInstrs *DAG) { + for (SUnit &SU : DAG->SUnits) { + if (!SU.isInstr()) + continue; + SmallVector Erase; + for (auto &D : SU.Preds) + if (D.getKind() == SDep::Output && D.getReg() == Hexagon::USR_OVF) + Erase.push_back(D); + for (auto &E : Erase) + SU.removePred(E); + } +} + +void HexagonSubtarget::HVXMemLatencyMutation::apply(ScheduleDAGInstrs *DAG) { + for (SUnit &SU : DAG->SUnits) { + // Update the latency of chain edges between v60 vector load or store + // instructions to be 1. These instruction cannot be scheduled in the + // same packet. + MachineInstr &MI1 = *SU.getInstr(); + auto *QII = static_cast(DAG->TII); + bool IsStoreMI1 = MI1.mayStore(); + bool IsLoadMI1 = MI1.mayLoad(); + if (!QII->isHVXVec(MI1) || !(IsStoreMI1 || IsLoadMI1)) + continue; + for (SDep &SI : SU.Succs) { + if (SI.getKind() != SDep::Order || SI.getLatency() != 0) + continue; + MachineInstr &MI2 = *SI.getSUnit()->getInstr(); + if (!QII->isHVXVec(MI2)) + continue; + if ((IsStoreMI1 && MI2.mayStore()) || (IsLoadMI1 && MI2.mayLoad())) { + SI.setLatency(1); + SU.setHeightDirty(); + // Change the dependence in the opposite direction too. + for (SDep &PI : SI.getSUnit()->Preds) { + if (PI.getSUnit() != &SU || PI.getKind() != SDep::Order) + continue; + PI.setLatency(1); + SI.getSUnit()->setDepthDirty(); + } + } + } + } +} + +// Check if a call and subsequent A2_tfrpi instructions should maintain +// scheduling affinity. We are looking for the TFRI to be consumed in +// the next instruction. This should help reduce the instances of +// double register pairs being allocated and scheduled before a call +// when not used until after the call. This situation is exacerbated +// by the fact that we allocate the pair from the callee saves list, +// leading to excess spills and restores. +bool HexagonSubtarget::CallMutation::shouldTFRICallBind( + const HexagonInstrInfo &HII, const SUnit &Inst1, + const SUnit &Inst2) const { + if (Inst1.getInstr()->getOpcode() != Hexagon::A2_tfrpi) + return false; + + // TypeXTYPE are 64 bit operations. + unsigned Type = HII.getType(*Inst2.getInstr()); + return Type == HexagonII::TypeS_2op || Type == HexagonII::TypeS_3op || + Type == HexagonII::TypeALU64 || Type == HexagonII::TypeM; +} + +void HexagonSubtarget::CallMutation::apply(ScheduleDAGInstrs *DAG) { + SUnit* LastSequentialCall = nullptr; + unsigned VRegHoldingRet = 0; + unsigned RetRegister; + SUnit* LastUseOfRet = nullptr; + auto &TRI = *DAG->MF.getSubtarget().getRegisterInfo(); + auto &HII = *DAG->MF.getSubtarget().getInstrInfo(); + + // Currently we only catch the situation when compare gets scheduled + // before preceding call. + for (unsigned su = 0, e = DAG->SUnits.size(); su != e; ++su) { + // Remember the call. + if (DAG->SUnits[su].getInstr()->isCall()) + LastSequentialCall = &DAG->SUnits[su]; + // Look for a compare that defines a predicate. + else if (DAG->SUnits[su].getInstr()->isCompare() && LastSequentialCall) + DAG->SUnits[su].addPred(SDep(LastSequentialCall, SDep::Barrier)); + // Look for call and tfri* instructions. + else if (SchedPredsCloser && LastSequentialCall && su > 1 && su < e-1 && + shouldTFRICallBind(HII, DAG->SUnits[su], DAG->SUnits[su+1])) + DAG->SUnits[su].addPred(SDep(&DAG->SUnits[su-1], SDep::Barrier)); + // Prevent redundant register copies between two calls, which are caused by + // both the return value and the argument for the next call being in %R0. + // Example: + // 1: + // 2: %VregX = COPY %R0 + // 3: + // 4: %R0 = ... + // 5: + // The scheduler would often swap 3 and 4, so an additional register is + // needed. This code inserts a Barrier dependence between 3 & 4 to prevent + // this. The same applies for %D0 and %V0/%W0, which are also handled. + else if (SchedRetvalOptimization) { + const MachineInstr *MI = DAG->SUnits[su].getInstr(); + if (MI->isCopy() && (MI->readsRegister(Hexagon::R0, &TRI) || + MI->readsRegister(Hexagon::V0, &TRI))) { + // %vregX = COPY %R0 + VRegHoldingRet = MI->getOperand(0).getReg(); + RetRegister = MI->getOperand(1).getReg(); + LastUseOfRet = nullptr; + } else if (VRegHoldingRet && MI->readsVirtualRegister(VRegHoldingRet)) + // + LastUseOfRet = &DAG->SUnits[su]; + else if (LastUseOfRet && MI->definesRegister(RetRegister, &TRI)) + // %R0 = ... + DAG->SUnits[su].addPred(SDep(LastUseOfRet, SDep::Barrier)); + } + } +} + + HexagonSubtarget::HexagonSubtarget(const Triple &TT, StringRef CPU, StringRef FS, const TargetMachine &TM) : HexagonGenSubtargetInfo(TT, CPU, FS), CPUString(CPU), @@ -204,59 +326,16 @@ void HexagonSubtarget::adjustSchedDependency(SUnit *Src, SUnit *Dst, updateLatency(*SrcInst, *DstInst, Dep); } -void HexagonSubtarget::HexagonDAGMutation::apply(ScheduleDAGInstrs *DAG) { - for (auto &SU : DAG->SUnits) { - if (!SU.isInstr()) - continue; - SmallVector Erase; - for (auto &D : SU.Preds) - if (D.getKind() == SDep::Output && D.getReg() == Hexagon::USR_OVF) - Erase.push_back(D); - for (auto &E : Erase) - SU.removePred(E); - } - - for (auto &SU : DAG->SUnits) { - // Update the latency of chain edges between v60 vector load or store - // instructions to be 1. These instruction cannot be scheduled in the - // same packet. - MachineInstr &MI1 = *SU.getInstr(); - auto *QII = static_cast(DAG->TII); - bool IsStoreMI1 = MI1.mayStore(); - bool IsLoadMI1 = MI1.mayLoad(); - if (!QII->isHVXVec(MI1) || !(IsStoreMI1 || IsLoadMI1)) - continue; - for (auto &SI : SU.Succs) { - if (SI.getKind() != SDep::Order || SI.getLatency() != 0) - continue; - MachineInstr &MI2 = *SI.getSUnit()->getInstr(); - if (!QII->isHVXVec(MI2)) - continue; - if ((IsStoreMI1 && MI2.mayStore()) || (IsLoadMI1 && MI2.mayLoad())) { - SI.setLatency(1); - SU.setHeightDirty(); - // Change the dependence in the opposite direction too. - for (auto &PI : SI.getSUnit()->Preds) { - if (PI.getSUnit() != &SU || PI.getKind() != SDep::Order) - continue; - PI.setLatency(1); - SI.getSUnit()->setDepthDirty(); - } - } - } - } -} - void HexagonSubtarget::getPostRAMutations( std::vector> &Mutations) const { - Mutations.push_back( - llvm::make_unique()); + Mutations.push_back(llvm::make_unique()); + Mutations.push_back(llvm::make_unique()); } void HexagonSubtarget::getSMSMutations( std::vector> &Mutations) const { - Mutations.push_back( - llvm::make_unique()); + Mutations.push_back(llvm::make_unique()); + Mutations.push_back(llvm::make_unique()); } // Pin the vtable to this file. diff --git a/llvm/lib/Target/Hexagon/HexagonSubtarget.h b/llvm/lib/Target/Hexagon/HexagonSubtarget.h index 753dca000065..542929d94a70 100644 --- a/llvm/lib/Target/Hexagon/HexagonSubtarget.h +++ b/llvm/lib/Target/Hexagon/HexagonSubtarget.h @@ -56,10 +56,18 @@ public: /// default for V60. bool UseBSBScheduling; - class HexagonDAGMutation : public ScheduleDAGMutation { - public: + struct UsrOverflowMutation : public ScheduleDAGMutation { void apply(ScheduleDAGInstrs *DAG) override; }; + struct HVXMemLatencyMutation : public ScheduleDAGMutation { + void apply(ScheduleDAGInstrs *DAG) override; + }; + struct CallMutation : public ScheduleDAGMutation { + void apply(ScheduleDAGInstrs *DAG) override; + private: + bool shouldTFRICallBind(const HexagonInstrInfo &HII, + const SUnit &Inst1, const SUnit &Inst2) const; + }; private: std::string CPUString; diff --git a/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp b/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp index 405f44123efe..85a69c93a95d 100644 --- a/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp +++ b/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp @@ -102,8 +102,9 @@ int HexagonTargetMachineModule = 0; static ScheduleDAGInstrs *createVLIWMachineSched(MachineSchedContext *C) { ScheduleDAGMILive *DAG = new VLIWMachineScheduler(C, make_unique()); - DAG->addMutation(make_unique()); - DAG->addMutation(make_unique()); + DAG->addMutation(make_unique()); + DAG->addMutation(make_unique()); + DAG->addMutation(make_unique()); DAG->addMutation(createCopyConstrainDAGMutation(DAG->TII, DAG->TRI)); return DAG; } diff --git a/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp b/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp index a3021e3dfe43..a6d134365a62 100644 --- a/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp +++ b/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp @@ -103,7 +103,8 @@ HexagonPacketizerList::HexagonPacketizerList(MachineFunction &MF, HII = MF.getSubtarget().getInstrInfo(); HRI = MF.getSubtarget().getRegisterInfo(); - addMutation(make_unique()); + addMutation(make_unique()); + addMutation(make_unique()); } // Check if FirstI modifies a register that SecondI reads.