diff --git a/llvm/lib/Target/ARM/ARM.h b/llvm/lib/Target/ARM/ARM.h index f87337bfe16c..b9b366da46e8 100644 --- a/llvm/lib/Target/ARM/ARM.h +++ b/llvm/lib/Target/ARM/ARM.h @@ -46,6 +46,7 @@ FunctionPass *createARMCodeGenPreparePass(); FunctionPass *createARMConstantIslandPass(); FunctionPass *createMLxExpansionPass(); FunctionPass *createThumb2ITBlockPass(); +FunctionPass *createMVEVPTBlockPass(); FunctionPass *createARMOptimizeBarriersPass(); FunctionPass *createThumb2SizeReductionPass( std::function Ftor = nullptr); @@ -68,6 +69,7 @@ void initializeARMCodeGenPreparePass(PassRegistry &); void initializeARMConstantIslandsPass(PassRegistry &); void initializeARMExpandPseudoPass(PassRegistry &); void initializeThumb2SizeReducePass(PassRegistry &); +void initializeMVEVPTBlockPass(PassRegistry &); } // end namespace llvm diff --git a/llvm/lib/Target/ARM/ARMTargetMachine.cpp b/llvm/lib/Target/ARM/ARMTargetMachine.cpp index ae541208aef6..bd8c5d0b1b66 100644 --- a/llvm/lib/Target/ARM/ARMTargetMachine.cpp +++ b/llvm/lib/Target/ARM/ARMTargetMachine.cpp @@ -95,6 +95,7 @@ extern "C" void LLVMInitializeARMTarget() { initializeARMExecutionDomainFixPass(Registry); initializeARMExpandPseudoPass(Registry); initializeThumb2SizeReducePass(Registry); + initializeMVEVPTBlockPass(Registry); } static std::unique_ptr createTLOF(const Triple &TT) { @@ -508,6 +509,7 @@ void ARMPassConfig::addPreSched2() { return !MF.getSubtarget().isThumb1Only(); })); } + addPass(createMVEVPTBlockPass()); addPass(createThumb2ITBlockPass()); } diff --git a/llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp b/llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp index 715ad743ab5c..ea39fb606bee 100644 --- a/llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp +++ b/llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp @@ -316,3 +316,123 @@ bool Thumb2ITBlockPass::runOnMachineFunction(MachineFunction &Fn) { FunctionPass *llvm::createThumb2ITBlockPass() { return new Thumb2ITBlockPass(); } + +#undef DEBUG_TYPE +#define DEBUG_TYPE "arm-mve-vpt" + +namespace { + class MVEVPTBlock : public MachineFunctionPass { + public: + static char ID; + const Thumb2InstrInfo *TII; + const TargetRegisterInfo *TRI; + + MVEVPTBlock() : MachineFunctionPass(ID) {} + + bool runOnMachineFunction(MachineFunction &Fn) override; + + MachineFunctionProperties getRequiredProperties() const override { + return MachineFunctionProperties().set( + MachineFunctionProperties::Property::NoVRegs); + } + + StringRef getPassName() const override { + return "MVE VPT block insertion pass"; + } + + private: + bool InsertVPTBlocks(MachineBasicBlock &MBB); + }; + + char MVEVPTBlock::ID = 0; + +} // end anonymous namespace + +INITIALIZE_PASS(MVEVPTBlock, DEBUG_TYPE, "ARM MVE VPT block pass", false, false) + +enum VPTMaskValue { + T = 8, // 0b1000 + TT = 4, // 0b0100 + TE = 12, // 0b1100 + TTT = 2, // 0b0010 + TTE = 6, // 0b0110 + TEE = 10, // 0b1010 + TET = 14, // 0b1110 + TTTT = 1, // 0b0001 + TTTE = 3, // 0b0011 + TTEE = 5, // 0b0101 + TTET = 7, // 0b0111 + TEEE = 9, // 0b1001 + TEET = 11, // 0b1011 + TETT = 13, // 0b1101 + TETE = 15 // 0b1111 +}; + +bool MVEVPTBlock::InsertVPTBlocks(MachineBasicBlock &Block) { + bool Modified = false; + MachineBasicBlock::iterator MBIter = Block.begin(); + MachineBasicBlock::iterator EndIter = Block.end(); + + while (MBIter != EndIter) { + MachineInstr *MI = &*MBIter; + unsigned PredReg = 0; + DebugLoc dl = MI->getDebugLoc(); + + ARMVCC::VPTCodes Pred = getVPTInstrPredicate(*MI, PredReg); + + // The idea of the predicate is that None, Then and Else are for use when + // handling assembly language: they correspond to the three possible + // suffixes "", "t" and "e" on the mnemonic. So when instructions are read + // from assembly source or disassembled from object code, you expect to see + // a mixture whenever there's a long VPT block. But in code generation, we + // hope we'll never generate an Else as input to this pass. + + assert(Pred != ARMVCC::Else && "VPT block pass does not expect Else preds"); + + if (Pred == ARMVCC::None) { + ++MBIter; + continue; + } + + MachineInstrBuilder MIBuilder = + BuildMI(Block, MBIter, dl, TII->get(ARM::t2VPST)); + MachineInstr *LastITMI = MI; + MachineBasicBlock::iterator InsertPos = MIBuilder.getInstr(); + + // The mask value for the VPST instruction is T = 0b1000 = 8 + MIBuilder.addImm(VPTMaskValue::T); + + finalizeBundle(Block, InsertPos.getInstrIterator(), + ++LastITMI->getIterator()); + Modified = true; + LLVM_DEBUG(dbgs() << "VPT block created for: "; MI->dump();); + + ++MBIter; + } + return Modified; +} + +bool MVEVPTBlock::runOnMachineFunction(MachineFunction &Fn) { + const ARMSubtarget &STI = + static_cast(Fn.getSubtarget()); + + if (!STI.isThumb2() || !STI.hasMVEIntegerOps()) + return false; + + TII = static_cast(STI.getInstrInfo()); + TRI = STI.getRegisterInfo(); + + LLVM_DEBUG(dbgs() << "********** ARM MVE VPT BLOCKS **********\n" + << "********** Function: " << Fn.getName() << '\n'); + + bool Modified = false; + for (MachineBasicBlock &MBB : Fn) + Modified |= InsertVPTBlocks(MBB); + + LLVM_DEBUG(dbgs() << "**************************************\n"); + return Modified; +} + +/// createMVEVPTBlock - Returns an instance of the MVE VPT block +/// insertion pass. +FunctionPass *llvm::createMVEVPTBlockPass() { return new MVEVPTBlock(); } diff --git a/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp b/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp index 1ffa067cd245..080e8c97835a 100644 --- a/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp +++ b/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp @@ -685,3 +685,28 @@ ARMCC::CondCodes llvm::getITInstrPredicate(const MachineInstr &MI, return ARMCC::AL; return getInstrPredicate(MI, PredReg); } + +int llvm::findFirstVPTPredOperandIdx(const MachineInstr &MI) { + const MCInstrDesc &MCID = MI.getDesc(); + + if (!MCID.OpInfo) + return -1; + + for (unsigned i = 0, e = MCID.getNumOperands(); i != e; ++i) + if (ARM::isVpred(MCID.OpInfo[i].OperandType)) + return i; + + return -1; +} + +ARMVCC::VPTCodes llvm::getVPTInstrPredicate(const MachineInstr &MI, + unsigned &PredReg) { + int PIdx = findFirstVPTPredOperandIdx(MI); + if (PIdx == -1) { + PredReg = 0; + return ARMVCC::None; + } + + PredReg = MI.getOperand(PIdx+1).getReg(); + return (ARMVCC::VPTCodes)MI.getOperand(PIdx).getImm(); +} diff --git a/llvm/lib/Target/ARM/Thumb2InstrInfo.h b/llvm/lib/Target/ARM/Thumb2InstrInfo.h index c5aca82acc26..a6712d5a0e72 100644 --- a/llvm/lib/Target/ARM/Thumb2InstrInfo.h +++ b/llvm/lib/Target/ARM/Thumb2InstrInfo.h @@ -68,6 +68,12 @@ private: /// to llvm::getInstrPredicate except it returns AL for conditional branch /// instructions which are "predicated", but are not in IT blocks. ARMCC::CondCodes getITInstrPredicate(const MachineInstr &MI, unsigned &PredReg); + +// getVPTInstrPredicate: VPT analogue of that, plus a helper function +// corresponding to MachineInstr::findFirstPredOperandIdx. +int findFirstVPTPredOperandIdx(const MachineInstr &MI); +ARMVCC::VPTCodes getVPTInstrPredicate(const MachineInstr &MI, + unsigned &PredReg); } #endif diff --git a/llvm/test/CodeGen/ARM/O3-pipeline.ll b/llvm/test/CodeGen/ARM/O3-pipeline.ll index 501877fb3f72..f9a4e712450d 100644 --- a/llvm/test/CodeGen/ARM/O3-pipeline.ll +++ b/llvm/test/CodeGen/ARM/O3-pipeline.ll @@ -125,6 +125,7 @@ ; CHECK-NEXT: Machine Natural Loop Construction ; CHECK-NEXT: Machine Block Frequency Analysis ; CHECK-NEXT: If Converter +; CHECK-NEXT: MVE VPT block insertion pass ; CHECK-NEXT: Thumb IT blocks insertion pass ; CHECK-NEXT: MachineDominator Tree Construction ; CHECK-NEXT: Machine Natural Loop Construction diff --git a/llvm/test/CodeGen/ARM/mve-vpt-block.mir b/llvm/test/CodeGen/ARM/mve-vpt-block.mir new file mode 100644 index 000000000000..03b921bf75fb --- /dev/null +++ b/llvm/test/CodeGen/ARM/mve-vpt-block.mir @@ -0,0 +1,71 @@ +# RUN: llc -run-pass arm-mve-vpt %s -o - | FileCheck %s + +--- | + target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" + target triple = "thumbv8.1m.main-arm-none-eabi" + + define hidden arm_aapcs_vfpcc <4 x float> @test_vminnmq_m_f32_v2(<4 x float> %inactive, <4 x float> %a, <4 x float> %b, i16 zeroext %p) local_unnamed_addr #0 { + entry: + %conv.i = zext i16 %p to i32 + %0 = tail call nnan ninf nsz <4 x float> @llvm.arm.mve.vminnm.m.v4f32.v4f32.v4f32.v4f32.i32(<4 x float> %inactive, <4 x float> %a, <4 x float> %b, i32 %conv.i) #2 + ret <4 x float> %0 + } + + declare <4 x float> @llvm.arm.mve.vminnm.m.v4f32.v4f32.v4f32.v4f32.i32(<4 x float>, <4 x float>, <4 x float>, i32) #1 + + attributes #0 = { nounwind readnone "correctly-rounded-divide-sqrt-fp-math"="false" "denormal-fp-math"="preserve-sign" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="128" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+armv8.1-m.main,+hwdiv,+mve.fp,+ras,+thumb-mode" "unsafe-fp-math"="false" "use-soft-float"="false" } + attributes #1 = { nounwind readnone } + attributes #2 = { nounwind } + + +... +--- +name: test_vminnmq_m_f32_v2 +alignment: 2 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +registers: [] +liveins: + - { reg: '$q0', virtual-reg: '' } + - { reg: '$q1', virtual-reg: '' } + - { reg: '$q2', virtual-reg: '' } + - { reg: '$r0', virtual-reg: '' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 0 + adjustsStack: false + hasCalls: false + stackProtector: '' + maxCallFrameSize: 0 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: [] +constants: [] +body: | + bb.0.entry: + liveins: $q0, $q1, $q2, $r0 + + ; CHECK: VPST 8, implicit-def $p0 + ; CHECK-NEXT: $q0 = nnan ninf nsz VMINNMf32 killed renamable $q1, killed renamable $q2, 1, killed renamable $vpr, killed renamable $q0 + + $vpr = VMSR_P0 killed $r0, 14, $noreg + renamable $q0 = nnan ninf nsz VMINNMf32 killed renamable $q1, killed renamable $q2, 1, killed renamable $vpr, killed renamable $q0 + tBX_RET 14, $noreg, implicit $q0 + +...