diff --git a/llvm/lib/Target/Hexagon/CMakeLists.txt b/llvm/lib/Target/Hexagon/CMakeLists.txt index 1c36093923ac..5e75cb6a589e 100644 --- a/llvm/lib/Target/Hexagon/CMakeLists.txt +++ b/llvm/lib/Target/Hexagon/CMakeLists.txt @@ -59,6 +59,7 @@ add_llvm_target(HexagonCodeGen HexagonTargetTransformInfo.cpp HexagonVectorLoopCarriedReuse.cpp HexagonVectorPrint.cpp + HexagonVExtract.cpp HexagonVLIWPacketizer.cpp RDFCopy.cpp RDFDeadCode.cpp diff --git a/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp b/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp index df65cfd97a40..4ecfc0753b9b 100644 --- a/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp +++ b/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp @@ -94,6 +94,9 @@ static cl::opt EnableVectorPrint("enable-hexagon-vector-print", cl::Hidden, cl::ZeroOrMore, cl::init(false), cl::desc("Enable Hexagon Vector print instr pass")); +static cl::opt EnableVExtractOpt("hexagon-opt-vextract", cl::Hidden, + cl::ZeroOrMore, cl::init(true), cl::desc("Enable vextract optimization")); + static cl::opt EnableTrapUnreachable("hexagon-trap-unreachable", cl::Hidden, cl::ZeroOrMore, cl::init(false), cl::desc("Enable generating trap for unreachable")); @@ -133,6 +136,7 @@ namespace llvm { void initializeHexagonOptAddrModePass(PassRegistry&); void initializeHexagonPacketizerPass(PassRegistry&); void initializeHexagonRDFOptPass(PassRegistry&); + void initializeHexagonVExtractPass(PassRegistry&); Pass *createHexagonLoopIdiomPass(); Pass *createHexagonVectorLoopCarriedReusePass(); @@ -165,6 +169,7 @@ namespace llvm { FunctionPass *createHexagonSplitDoubleRegs(); FunctionPass *createHexagonStoreWidening(); FunctionPass *createHexagonVectorPrint(); + FunctionPass *createHexagonVExtract(); } // end namespace llvm; static Reloc::Model getEffectiveRelocModel(Optional RM) { @@ -194,6 +199,7 @@ extern "C" void LLVMInitializeHexagonTarget() { initializeHexagonOptAddrModePass(PR); initializeHexagonPacketizerPass(PR); initializeHexagonRDFOptPass(PR); + initializeHexagonVExtractPass(PR); } HexagonTargetMachine::HexagonTargetMachine(const Target &T, const Triple &TT, @@ -326,6 +332,8 @@ bool HexagonPassConfig::addInstSelector() { addPass(createHexagonISelDag(TM, getOptLevel())); if (!NoOpt) { + if (EnableVExtractOpt) + addPass(createHexagonVExtract()); // Create logical operations on predicate registers. if (EnableGenPred) addPass(createHexagonGenPredicate()); diff --git a/llvm/lib/Target/Hexagon/HexagonVExtract.cpp b/llvm/lib/Target/Hexagon/HexagonVExtract.cpp new file mode 100644 index 000000000000..67c201d19163 --- /dev/null +++ b/llvm/lib/Target/Hexagon/HexagonVExtract.cpp @@ -0,0 +1,167 @@ +//===- HexagonVExtract.cpp ------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// This pass will replace multiple occurrences of V6_extractw from the same +// vector register with a combination of a vector store and scalar loads. +//===----------------------------------------------------------------------===// + +#include "Hexagon.h" +#include "HexagonInstrInfo.h" +#include "HexagonRegisterInfo.h" +#include "HexagonSubtarget.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/PassSupport.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/CommandLine.h" + +#include + +using namespace llvm; + +static cl::opt VExtractThreshold("hexagon-vextract-threshold", + cl::Hidden, cl::ZeroOrMore, cl::init(1), + cl::desc("Threshold for triggering vextract replacement")); + +namespace llvm { + void initializeHexagonVExtractPass(PassRegistry& Registry); + FunctionPass *createHexagonVExtract(); +} + +namespace { + class HexagonVExtract : public MachineFunctionPass { + public: + static char ID; + HexagonVExtract() : MachineFunctionPass(ID) {} + + StringRef getPassName() const override { + return "Hexagon optimize vextract"; + } + void getAnalysisUsage(AnalysisUsage &AU) const override { + MachineFunctionPass::getAnalysisUsage(AU); + } + bool runOnMachineFunction(MachineFunction &MF) override; + + private: + const HexagonSubtarget *HST = nullptr; + const HexagonInstrInfo *HII = nullptr; + + unsigned genElemLoad(MachineInstr *ExtI, unsigned BaseR, + MachineRegisterInfo &MRI); + }; + + char HexagonVExtract::ID = 0; +} + +INITIALIZE_PASS(HexagonVExtract, "hexagon-vextract", + "Hexagon optimize vextract", false, false) + +unsigned HexagonVExtract::genElemLoad(MachineInstr *ExtI, unsigned BaseR, + MachineRegisterInfo &MRI) { + MachineBasicBlock &ExtB = *ExtI->getParent(); + DebugLoc DL = ExtI->getDebugLoc(); + unsigned ElemR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass); + + unsigned ExtIdxR = ExtI->getOperand(2).getReg(); + unsigned ExtIdxS = ExtI->getOperand(2).getSubReg(); + + // Simplified check for a compile-time constant value of ExtIdxR. + if (ExtIdxS == 0) { + MachineInstr *DI = MRI.getVRegDef(ExtIdxR); + if (DI->getOpcode() == Hexagon::A2_tfrsi) { + unsigned V = DI->getOperand(1).getImm(); + V &= (HST->getVectorLength()-1) & -4u; + + BuildMI(ExtB, ExtI, DL, HII->get(Hexagon::L2_loadri_io), ElemR) + .addReg(BaseR) + .addImm(V); + return ElemR; + } + } + + unsigned IdxR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass); + BuildMI(ExtB, ExtI, DL, HII->get(Hexagon::A2_andir), IdxR) + .add(ExtI->getOperand(2)) + .addImm(-4); + BuildMI(ExtB, ExtI, DL, HII->get(Hexagon::L4_loadri_rr), ElemR) + .addReg(BaseR) + .addReg(IdxR) + .addImm(0); + return ElemR; +} + +bool HexagonVExtract::runOnMachineFunction(MachineFunction &MF) { + HST = &MF.getSubtarget(); + HII = HST->getInstrInfo(); + const auto &HRI = *HST->getRegisterInfo(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + MachineFrameInfo &MFI = MF.getFrameInfo(); + std::map> VExtractMap; + bool Changed = false; + + for (MachineBasicBlock &MBB : MF) { + for (MachineInstr &MI : MBB) { + unsigned Opc = MI.getOpcode(); + if (Opc != Hexagon::V6_extractw) + continue; + unsigned VecR = MI.getOperand(1).getReg(); + VExtractMap[VecR].push_back(&MI); + } + } + + for (auto &P : VExtractMap) { + unsigned VecR = P.first; + if (P.second.size() <= VExtractThreshold) + continue; + + const auto &VecRC = *MRI.getRegClass(VecR); + int FI = MFI.CreateSpillStackObject(HRI.getSpillSize(VecRC), + HRI.getSpillAlignment(VecRC)); + MachineInstr *DefI = MRI.getVRegDef(VecR); + MachineBasicBlock::iterator At = std::next(DefI->getIterator()); + MachineBasicBlock &DefB = *DefI->getParent(); + unsigned StoreOpc = VecRC.getID() == Hexagon::HvxVRRegClassID + ? Hexagon::V6_vS32b_ai + : Hexagon::PS_vstorerw_ai; + BuildMI(DefB, At, DefI->getDebugLoc(), HII->get(StoreOpc)) + .addFrameIndex(FI) + .addImm(0) + .addReg(VecR); + + unsigned VecSize = HRI.getRegSizeInBits(VecRC) / 8; + + for (MachineInstr *ExtI : P.second) { + assert(ExtI->getOpcode() == Hexagon::V6_extractw); + unsigned VR = ExtI->getOperand(1).getReg(); + unsigned SR = ExtI->getOperand(1).getSubReg(); + assert(VR == VecR); + + MachineBasicBlock &ExtB = *ExtI->getParent(); + DebugLoc DL = ExtI->getDebugLoc(); + unsigned BaseR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass); + BuildMI(ExtB, ExtI, DL, HII->get(Hexagon::PS_fi), BaseR) + .addFrameIndex(FI) + .addImm(SR == 0 ? 0 : VecSize/2); + + unsigned ElemR = genElemLoad(ExtI, BaseR, MRI); + unsigned ExtR = ExtI->getOperand(0).getReg(); + MRI.replaceRegWith(ExtR, ElemR); + ExtB.erase(ExtI); + Changed = true; + } + } + + return Changed; +} + +FunctionPass *llvm::createHexagonVExtract() { + return new HexagonVExtract(); +} diff --git a/llvm/test/CodeGen/Hexagon/vextract-basic.mir b/llvm/test/CodeGen/Hexagon/vextract-basic.mir new file mode 100644 index 000000000000..53e7c49b4146 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/vextract-basic.mir @@ -0,0 +1,26 @@ +# RUN: llc -march=hexagon -mattr=+hvx,+hvx-length64b -run-pass hexagon-vextract %s -o - | FileCheck %s + +--- +name: fred +tracksRegLiveness: true + +body: | + bb.0: + liveins: %r0, %r1, %v0 + %0:hvxvr = COPY %v0 + %1:intregs = COPY %r0 + %2:intregs = COPY %r1 + %3:intregs = A2_tfrsi 5 + %4:intregs = V6_extractw %0, %1 + ; CHECK: %[[A0:[0-9]+]]:intregs = A2_andir %{{[0-9]+}}, -4 + ; CHECK: L4_loadri_rr %{{[0-9]+}}, %[[A0]], 0 + %5:intregs = V6_extractw %0, %2 + ; CHECK: %[[A1:[0-9]+]]:intregs = A2_andir %{{[0-9]+}}, -4 + ; CHECK: L4_loadri_rr %{{[0-9]+}}, %[[A1]], 0 + %6:intregs = V6_extractw %0, %3 + ; Make sure the offset is 4, not 5. + ; CHECK: L2_loadri_io %{{[0-9]+}}, 4 + +... + +