forked from OSchip/llvm-project
[ARM] MVE VPT Block Pass
Initial commit of a new pass to create vector predication blocks, called VPT blocks, that are supported by the Armv8.1-M MVE architecture. This is a first naive implementation. I.e., for 2 consecutive predicated instructions I1 and I2, for example, it will generate 2 VPT blocks: VPST I1 VPST I2 A more optimal implementation would obviously put instructions in the same VPT block when they are predicated on the same condition and when it is allowed to do this: VPTT I1 I2 We will address this optimisation with follow up patches when the groundwork is in. Creating VPT Blocks is very similar to IT Blocks, which is the reason I added this to Thumb2ITBlocks.cpp. This allows reuse of the def use analysis that we need for the more optimal implementation. VPT blocks cannot be nested in IT blocks, and vice versa, and so these 2 passes cannot interact with each other. Instructions allowed in VPT blocks must be MVE instructions that are marked as VPT compatible. Differential Revision: https://reviews.llvm.org/D63247 llvm-svn: 363370
This commit is contained in:
parent
0f15ba98f5
commit
3058a62b90
|
@ -46,6 +46,7 @@ FunctionPass *createARMCodeGenPreparePass();
|
|||
FunctionPass *createARMConstantIslandPass();
|
||||
FunctionPass *createMLxExpansionPass();
|
||||
FunctionPass *createThumb2ITBlockPass();
|
||||
FunctionPass *createMVEVPTBlockPass();
|
||||
FunctionPass *createARMOptimizeBarriersPass();
|
||||
FunctionPass *createThumb2SizeReductionPass(
|
||||
std::function<bool(const Function &)> Ftor = nullptr);
|
||||
|
@ -68,6 +69,7 @@ void initializeARMCodeGenPreparePass(PassRegistry &);
|
|||
void initializeARMConstantIslandsPass(PassRegistry &);
|
||||
void initializeARMExpandPseudoPass(PassRegistry &);
|
||||
void initializeThumb2SizeReducePass(PassRegistry &);
|
||||
void initializeMVEVPTBlockPass(PassRegistry &);
|
||||
|
||||
} // end namespace llvm
|
||||
|
||||
|
|
|
@ -95,6 +95,7 @@ extern "C" void LLVMInitializeARMTarget() {
|
|||
initializeARMExecutionDomainFixPass(Registry);
|
||||
initializeARMExpandPseudoPass(Registry);
|
||||
initializeThumb2SizeReducePass(Registry);
|
||||
initializeMVEVPTBlockPass(Registry);
|
||||
}
|
||||
|
||||
static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
|
||||
|
@ -508,6 +509,7 @@ void ARMPassConfig::addPreSched2() {
|
|||
return !MF.getSubtarget<ARMSubtarget>().isThumb1Only();
|
||||
}));
|
||||
}
|
||||
addPass(createMVEVPTBlockPass());
|
||||
addPass(createThumb2ITBlockPass());
|
||||
}
|
||||
|
||||
|
|
|
@ -316,3 +316,123 @@ bool Thumb2ITBlockPass::runOnMachineFunction(MachineFunction &Fn) {
|
|||
FunctionPass *llvm::createThumb2ITBlockPass() {
|
||||
return new Thumb2ITBlockPass();
|
||||
}
|
||||
|
||||
#undef DEBUG_TYPE
|
||||
#define DEBUG_TYPE "arm-mve-vpt"
|
||||
|
||||
namespace {
|
||||
class MVEVPTBlock : public MachineFunctionPass {
|
||||
public:
|
||||
static char ID;
|
||||
const Thumb2InstrInfo *TII;
|
||||
const TargetRegisterInfo *TRI;
|
||||
|
||||
MVEVPTBlock() : MachineFunctionPass(ID) {}
|
||||
|
||||
bool runOnMachineFunction(MachineFunction &Fn) override;
|
||||
|
||||
MachineFunctionProperties getRequiredProperties() const override {
|
||||
return MachineFunctionProperties().set(
|
||||
MachineFunctionProperties::Property::NoVRegs);
|
||||
}
|
||||
|
||||
StringRef getPassName() const override {
|
||||
return "MVE VPT block insertion pass";
|
||||
}
|
||||
|
||||
private:
|
||||
bool InsertVPTBlocks(MachineBasicBlock &MBB);
|
||||
};
|
||||
|
||||
char MVEVPTBlock::ID = 0;
|
||||
|
||||
} // end anonymous namespace
|
||||
|
||||
INITIALIZE_PASS(MVEVPTBlock, DEBUG_TYPE, "ARM MVE VPT block pass", false, false)
|
||||
|
||||
enum VPTMaskValue {
|
||||
T = 8, // 0b1000
|
||||
TT = 4, // 0b0100
|
||||
TE = 12, // 0b1100
|
||||
TTT = 2, // 0b0010
|
||||
TTE = 6, // 0b0110
|
||||
TEE = 10, // 0b1010
|
||||
TET = 14, // 0b1110
|
||||
TTTT = 1, // 0b0001
|
||||
TTTE = 3, // 0b0011
|
||||
TTEE = 5, // 0b0101
|
||||
TTET = 7, // 0b0111
|
||||
TEEE = 9, // 0b1001
|
||||
TEET = 11, // 0b1011
|
||||
TETT = 13, // 0b1101
|
||||
TETE = 15 // 0b1111
|
||||
};
|
||||
|
||||
bool MVEVPTBlock::InsertVPTBlocks(MachineBasicBlock &Block) {
|
||||
bool Modified = false;
|
||||
MachineBasicBlock::iterator MBIter = Block.begin();
|
||||
MachineBasicBlock::iterator EndIter = Block.end();
|
||||
|
||||
while (MBIter != EndIter) {
|
||||
MachineInstr *MI = &*MBIter;
|
||||
unsigned PredReg = 0;
|
||||
DebugLoc dl = MI->getDebugLoc();
|
||||
|
||||
ARMVCC::VPTCodes Pred = getVPTInstrPredicate(*MI, PredReg);
|
||||
|
||||
// The idea of the predicate is that None, Then and Else are for use when
|
||||
// handling assembly language: they correspond to the three possible
|
||||
// suffixes "", "t" and "e" on the mnemonic. So when instructions are read
|
||||
// from assembly source or disassembled from object code, you expect to see
|
||||
// a mixture whenever there's a long VPT block. But in code generation, we
|
||||
// hope we'll never generate an Else as input to this pass.
|
||||
|
||||
assert(Pred != ARMVCC::Else && "VPT block pass does not expect Else preds");
|
||||
|
||||
if (Pred == ARMVCC::None) {
|
||||
++MBIter;
|
||||
continue;
|
||||
}
|
||||
|
||||
MachineInstrBuilder MIBuilder =
|
||||
BuildMI(Block, MBIter, dl, TII->get(ARM::t2VPST));
|
||||
MachineInstr *LastITMI = MI;
|
||||
MachineBasicBlock::iterator InsertPos = MIBuilder.getInstr();
|
||||
|
||||
// The mask value for the VPST instruction is T = 0b1000 = 8
|
||||
MIBuilder.addImm(VPTMaskValue::T);
|
||||
|
||||
finalizeBundle(Block, InsertPos.getInstrIterator(),
|
||||
++LastITMI->getIterator());
|
||||
Modified = true;
|
||||
LLVM_DEBUG(dbgs() << "VPT block created for: "; MI->dump(););
|
||||
|
||||
++MBIter;
|
||||
}
|
||||
return Modified;
|
||||
}
|
||||
|
||||
bool MVEVPTBlock::runOnMachineFunction(MachineFunction &Fn) {
|
||||
const ARMSubtarget &STI =
|
||||
static_cast<const ARMSubtarget &>(Fn.getSubtarget());
|
||||
|
||||
if (!STI.isThumb2() || !STI.hasMVEIntegerOps())
|
||||
return false;
|
||||
|
||||
TII = static_cast<const Thumb2InstrInfo *>(STI.getInstrInfo());
|
||||
TRI = STI.getRegisterInfo();
|
||||
|
||||
LLVM_DEBUG(dbgs() << "********** ARM MVE VPT BLOCKS **********\n"
|
||||
<< "********** Function: " << Fn.getName() << '\n');
|
||||
|
||||
bool Modified = false;
|
||||
for (MachineBasicBlock &MBB : Fn)
|
||||
Modified |= InsertVPTBlocks(MBB);
|
||||
|
||||
LLVM_DEBUG(dbgs() << "**************************************\n");
|
||||
return Modified;
|
||||
}
|
||||
|
||||
/// createMVEVPTBlock - Returns an instance of the MVE VPT block
|
||||
/// insertion pass.
|
||||
FunctionPass *llvm::createMVEVPTBlockPass() { return new MVEVPTBlock(); }
|
||||
|
|
|
@ -685,3 +685,28 @@ ARMCC::CondCodes llvm::getITInstrPredicate(const MachineInstr &MI,
|
|||
return ARMCC::AL;
|
||||
return getInstrPredicate(MI, PredReg);
|
||||
}
|
||||
|
||||
int llvm::findFirstVPTPredOperandIdx(const MachineInstr &MI) {
|
||||
const MCInstrDesc &MCID = MI.getDesc();
|
||||
|
||||
if (!MCID.OpInfo)
|
||||
return -1;
|
||||
|
||||
for (unsigned i = 0, e = MCID.getNumOperands(); i != e; ++i)
|
||||
if (ARM::isVpred(MCID.OpInfo[i].OperandType))
|
||||
return i;
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
ARMVCC::VPTCodes llvm::getVPTInstrPredicate(const MachineInstr &MI,
|
||||
unsigned &PredReg) {
|
||||
int PIdx = findFirstVPTPredOperandIdx(MI);
|
||||
if (PIdx == -1) {
|
||||
PredReg = 0;
|
||||
return ARMVCC::None;
|
||||
}
|
||||
|
||||
PredReg = MI.getOperand(PIdx+1).getReg();
|
||||
return (ARMVCC::VPTCodes)MI.getOperand(PIdx).getImm();
|
||||
}
|
||||
|
|
|
@ -68,6 +68,12 @@ private:
|
|||
/// to llvm::getInstrPredicate except it returns AL for conditional branch
|
||||
/// instructions which are "predicated", but are not in IT blocks.
|
||||
ARMCC::CondCodes getITInstrPredicate(const MachineInstr &MI, unsigned &PredReg);
|
||||
|
||||
// getVPTInstrPredicate: VPT analogue of that, plus a helper function
|
||||
// corresponding to MachineInstr::findFirstPredOperandIdx.
|
||||
int findFirstVPTPredOperandIdx(const MachineInstr &MI);
|
||||
ARMVCC::VPTCodes getVPTInstrPredicate(const MachineInstr &MI,
|
||||
unsigned &PredReg);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
|
@ -125,6 +125,7 @@
|
|||
; CHECK-NEXT: Machine Natural Loop Construction
|
||||
; CHECK-NEXT: Machine Block Frequency Analysis
|
||||
; CHECK-NEXT: If Converter
|
||||
; CHECK-NEXT: MVE VPT block insertion pass
|
||||
; CHECK-NEXT: Thumb IT blocks insertion pass
|
||||
; CHECK-NEXT: MachineDominator Tree Construction
|
||||
; CHECK-NEXT: Machine Natural Loop Construction
|
||||
|
|
|
@ -0,0 +1,71 @@
|
|||
# RUN: llc -run-pass arm-mve-vpt %s -o - | FileCheck %s
|
||||
|
||||
--- |
|
||||
target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
|
||||
target triple = "thumbv8.1m.main-arm-none-eabi"
|
||||
|
||||
define hidden arm_aapcs_vfpcc <4 x float> @test_vminnmq_m_f32_v2(<4 x float> %inactive, <4 x float> %a, <4 x float> %b, i16 zeroext %p) local_unnamed_addr #0 {
|
||||
entry:
|
||||
%conv.i = zext i16 %p to i32
|
||||
%0 = tail call nnan ninf nsz <4 x float> @llvm.arm.mve.vminnm.m.v4f32.v4f32.v4f32.v4f32.i32(<4 x float> %inactive, <4 x float> %a, <4 x float> %b, i32 %conv.i) #2
|
||||
ret <4 x float> %0
|
||||
}
|
||||
|
||||
declare <4 x float> @llvm.arm.mve.vminnm.m.v4f32.v4f32.v4f32.v4f32.i32(<4 x float>, <4 x float>, <4 x float>, i32) #1
|
||||
|
||||
attributes #0 = { nounwind readnone "correctly-rounded-divide-sqrt-fp-math"="false" "denormal-fp-math"="preserve-sign" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="128" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+armv8.1-m.main,+hwdiv,+mve.fp,+ras,+thumb-mode" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||||
attributes #1 = { nounwind readnone }
|
||||
attributes #2 = { nounwind }
|
||||
|
||||
|
||||
...
|
||||
---
|
||||
name: test_vminnmq_m_f32_v2
|
||||
alignment: 2
|
||||
exposesReturnsTwice: false
|
||||
legalized: false
|
||||
regBankSelected: false
|
||||
selected: false
|
||||
failedISel: false
|
||||
tracksRegLiveness: true
|
||||
hasWinCFI: false
|
||||
registers: []
|
||||
liveins:
|
||||
- { reg: '$q0', virtual-reg: '' }
|
||||
- { reg: '$q1', virtual-reg: '' }
|
||||
- { reg: '$q2', virtual-reg: '' }
|
||||
- { reg: '$r0', virtual-reg: '' }
|
||||
frameInfo:
|
||||
isFrameAddressTaken: false
|
||||
isReturnAddressTaken: false
|
||||
hasStackMap: false
|
||||
hasPatchPoint: false
|
||||
stackSize: 0
|
||||
offsetAdjustment: 0
|
||||
maxAlignment: 0
|
||||
adjustsStack: false
|
||||
hasCalls: false
|
||||
stackProtector: ''
|
||||
maxCallFrameSize: 0
|
||||
cvBytesOfCalleeSavedRegisters: 0
|
||||
hasOpaqueSPAdjustment: false
|
||||
hasVAStart: false
|
||||
hasMustTailInVarArgFunc: false
|
||||
localFrameSize: 0
|
||||
savePoint: ''
|
||||
restorePoint: ''
|
||||
fixedStack: []
|
||||
stack: []
|
||||
constants: []
|
||||
body: |
|
||||
bb.0.entry:
|
||||
liveins: $q0, $q1, $q2, $r0
|
||||
|
||||
; CHECK: VPST 8, implicit-def $p0
|
||||
; CHECK-NEXT: $q0 = nnan ninf nsz VMINNMf32 killed renamable $q1, killed renamable $q2, 1, killed renamable $vpr, killed renamable $q0
|
||||
|
||||
$vpr = VMSR_P0 killed $r0, 14, $noreg
|
||||
renamable $q0 = nnan ninf nsz VMINNMf32 killed renamable $q1, killed renamable $q2, 1, killed renamable $vpr, killed renamable $q0
|
||||
tBX_RET 14, $noreg, implicit $q0
|
||||
|
||||
...
|
Loading…
Reference in New Issue