forked from OSchip/llvm-project
[GlobalISel][AArch64] Add selection support for G_EXTRACT_VECTOR_ELT
This adds instruction selection support for G_EXTRACT_VECTOR_ELT for cases where the index is defined by a G_CONSTANT. It also factos out the lane copy opcode selection part into its own function, `getLaneCopyOpcode`. This is used by both `selectUnmergeValues` and `selectExtractElt`. Differential Revision: https://reviews.llvm.org/D58469 llvm-svn: 355344
This commit is contained in:
parent
4737abc71c
commit
caf62b1d47
|
@ -78,6 +78,7 @@ private:
|
|||
void collectShuffleMaskIndices(MachineInstr &I, MachineRegisterInfo &MRI,
|
||||
SmallVectorImpl<int> &Idxs) const;
|
||||
bool selectShuffleVector(MachineInstr &I, MachineRegisterInfo &MRI) const;
|
||||
bool selectExtractElt(MachineInstr &I, MachineRegisterInfo &MRI) const;
|
||||
|
||||
unsigned emitConstantPoolEntry(Constant *CPVal, MachineFunction &MF) const;
|
||||
MachineInstr *emitLoadFromConstantPool(Constant *CPVal,
|
||||
|
@ -1709,6 +1710,8 @@ bool AArch64InstructionSelector::select(MachineInstr &I,
|
|||
return selectUnmergeValues(I, MRI);
|
||||
case TargetOpcode::G_SHUFFLE_VECTOR:
|
||||
return selectShuffleVector(I, MRI);
|
||||
case TargetOpcode::G_EXTRACT_VECTOR_ELT:
|
||||
return selectExtractElt(I, MRI);
|
||||
}
|
||||
|
||||
return false;
|
||||
|
@ -1787,6 +1790,127 @@ bool AArch64InstructionSelector::selectMergeValues(
|
|||
return true;
|
||||
}
|
||||
|
||||
static bool getLaneCopyOpcode(unsigned &CopyOpc, unsigned &ExtractSubReg,
|
||||
const unsigned EltSize) {
|
||||
// Choose a lane copy opcode and subregister based off of the size of the
|
||||
// vector's elements.
|
||||
switch (EltSize) {
|
||||
case 16:
|
||||
CopyOpc = AArch64::CPYi16;
|
||||
ExtractSubReg = AArch64::hsub;
|
||||
break;
|
||||
case 32:
|
||||
CopyOpc = AArch64::CPYi32;
|
||||
ExtractSubReg = AArch64::ssub;
|
||||
break;
|
||||
case 64:
|
||||
CopyOpc = AArch64::CPYi64;
|
||||
ExtractSubReg = AArch64::dsub;
|
||||
break;
|
||||
default:
|
||||
// Unknown size, bail out.
|
||||
LLVM_DEBUG(dbgs() << "Elt size '" << EltSize << "' unsupported.\n");
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool AArch64InstructionSelector::selectExtractElt(
|
||||
MachineInstr &I, MachineRegisterInfo &MRI) const {
|
||||
assert(I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT &&
|
||||
"unexpected opcode!");
|
||||
unsigned DstReg = I.getOperand(0).getReg();
|
||||
const LLT NarrowTy = MRI.getType(DstReg);
|
||||
const unsigned SrcReg = I.getOperand(1).getReg();
|
||||
const LLT WideTy = MRI.getType(SrcReg);
|
||||
|
||||
assert(WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() &&
|
||||
"source register size too small!");
|
||||
assert(NarrowTy.isScalar() && "cannot extract vector into vector!");
|
||||
|
||||
// Need the lane index to determine the correct copy opcode.
|
||||
MachineOperand &LaneIdxOp = I.getOperand(2);
|
||||
assert(LaneIdxOp.isReg() && "Lane index operand was not a register?");
|
||||
|
||||
if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
|
||||
LLVM_DEBUG(dbgs() << "Cannot extract into GPR.\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
// Find the instruction that defines the constant to extract from. There could
|
||||
// be any number of copies between the instruction and the definition of the
|
||||
// index. Skip them.
|
||||
MachineInstr *LaneDefInst = nullptr;
|
||||
for (LaneDefInst = MRI.getVRegDef(LaneIdxOp.getReg());
|
||||
LaneDefInst && LaneDefInst->isCopy();
|
||||
LaneDefInst = MRI.getVRegDef(LaneDefInst->getOperand(1).getReg())) {
|
||||
}
|
||||
|
||||
// Did we find a def in the first place? If not, bail.
|
||||
if (!LaneDefInst) {
|
||||
LLVM_DEBUG(dbgs() << "Did not find VReg definition for " << LaneIdxOp
|
||||
<< "\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
// TODO: Handle extracts that don't use G_CONSTANT.
|
||||
if (LaneDefInst->getOpcode() != TargetOpcode::G_CONSTANT) {
|
||||
LLVM_DEBUG(dbgs() << "VRegs defined by anything other than G_CONSTANT "
|
||||
"currently unsupported.\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
unsigned LaneIdx = LaneDefInst->getOperand(1).getCImm()->getLimitedValue();
|
||||
unsigned CopyOpc = 0;
|
||||
unsigned ExtractSubReg = 0;
|
||||
if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, NarrowTy.getSizeInBits())) {
|
||||
LLVM_DEBUG(
|
||||
dbgs() << "Couldn't determine lane copy opcode for instruction.\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
|
||||
const TargetRegisterClass *DstRC =
|
||||
getRegClassForTypeOnBank(NarrowTy, DstRB, RBI, true);
|
||||
if (!DstRC) {
|
||||
LLVM_DEBUG(dbgs() << "Could not determine destination register class.\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
|
||||
const TargetRegisterClass *SrcRC =
|
||||
getRegClassForTypeOnBank(WideTy, SrcRB, RBI, true);
|
||||
if (!SrcRC) {
|
||||
LLVM_DEBUG(dbgs() << "Could not determine source register class.\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
// The register that we're going to copy into.
|
||||
unsigned InsertReg = SrcReg;
|
||||
MachineIRBuilder MIRBuilder(I);
|
||||
|
||||
// Lane copies require 128-bit wide registers. If we're dealing with an
|
||||
// unpacked vector, then we need to move up to that width. Insert an implicit
|
||||
// def and a subregister insert to get us there.
|
||||
if (WideTy.getSizeInBits() != 128) {
|
||||
MachineInstr *ScalarToVector = emitScalarToVector(
|
||||
WideTy.getSizeInBits(), &AArch64::FPR128RegClass, SrcReg, MIRBuilder);
|
||||
if (!ScalarToVector)
|
||||
return false;
|
||||
InsertReg = ScalarToVector->getOperand(0).getReg();
|
||||
}
|
||||
|
||||
MachineInstr *LaneCopyMI =
|
||||
MIRBuilder.buildInstr(CopyOpc, {DstReg}, {InsertReg}).addImm(LaneIdx);
|
||||
constrainSelectedInstRegOperands(*LaneCopyMI, TII, TRI, RBI);
|
||||
|
||||
// Make sure that we actually constrain the initial copy.
|
||||
RBI.constrainGenericRegister(DstReg, *DstRC, MRI);
|
||||
|
||||
I.eraseFromParent();
|
||||
return true;
|
||||
}
|
||||
|
||||
bool AArch64InstructionSelector::selectUnmergeValues(
|
||||
MachineInstr &I, MachineRegisterInfo &MRI) const {
|
||||
assert(I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
|
||||
|
@ -1823,24 +1947,8 @@ bool AArch64InstructionSelector::selectUnmergeValues(
|
|||
// vector's elements.
|
||||
unsigned CopyOpc = 0;
|
||||
unsigned ExtractSubReg = 0;
|
||||
switch (NarrowTy.getSizeInBits()) {
|
||||
case 16:
|
||||
CopyOpc = AArch64::CPYi16;
|
||||
ExtractSubReg = AArch64::hsub;
|
||||
break;
|
||||
case 32:
|
||||
CopyOpc = AArch64::CPYi32;
|
||||
ExtractSubReg = AArch64::ssub;
|
||||
break;
|
||||
case 64:
|
||||
CopyOpc = AArch64::CPYi64;
|
||||
ExtractSubReg = AArch64::dsub;
|
||||
break;
|
||||
default:
|
||||
// Unknown size, bail out.
|
||||
LLVM_DEBUG(dbgs() << "NarrowTy had unsupported size.\n");
|
||||
if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, NarrowTy.getSizeInBits()))
|
||||
return false;
|
||||
}
|
||||
|
||||
// Set up for the lane copies.
|
||||
MachineBasicBlock &MBB = *I.getParent();
|
||||
|
|
|
@ -441,7 +441,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) {
|
|||
.minScalar(2, s64)
|
||||
.legalIf([=](const LegalityQuery &Query) {
|
||||
const LLT &VecTy = Query.Types[1];
|
||||
return VecTy == v4s32 || VecTy == v2s64;
|
||||
return VecTy == v2s16 || VecTy == v4s16 || VecTy == v4s32 ||
|
||||
VecTy == v2s64 || VecTy == v2s32;
|
||||
});
|
||||
|
||||
getActionDefinitionsBuilder(G_BUILD_VECTOR)
|
||||
|
|
|
@ -689,6 +689,14 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
|
|||
}
|
||||
break;
|
||||
}
|
||||
case TargetOpcode::G_EXTRACT_VECTOR_ELT:
|
||||
// Destination and source need to be FPRs.
|
||||
OpRegBankIdx[0] = PMI_FirstFPR;
|
||||
OpRegBankIdx[1] = PMI_FirstFPR;
|
||||
|
||||
// Index needs to be a GPR.
|
||||
OpRegBankIdx[2] = PMI_FirstGPR;
|
||||
break;
|
||||
|
||||
case TargetOpcode::G_BUILD_VECTOR:
|
||||
// If the first source operand belongs to a FPR register bank, then make
|
||||
|
|
|
@ -0,0 +1,103 @@
|
|||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc -mtriple=aarch64-unknown-unknown -verify-machineinstrs -O0 -run-pass=regbankselect %s -o - | FileCheck %s
|
||||
|
||||
name: v2s32_fpr
|
||||
alignment: 2
|
||||
legalized: true
|
||||
tracksRegLiveness: true
|
||||
registers:
|
||||
- { id: 0, class: _ }
|
||||
- { id: 1, class: _ }
|
||||
- { id: 2, class: _ }
|
||||
body: |
|
||||
bb.1.entry:
|
||||
liveins: $d0
|
||||
|
||||
%0:_(<2 x s32>) = COPY $d0
|
||||
%2:_(s64) = G_CONSTANT i64 1
|
||||
%1:_(s32) = G_EXTRACT_VECTOR_ELT %0(<2 x s32>), %2(s64)
|
||||
$s0 = COPY %1(s32)
|
||||
RET_ReallyLR implicit $s0
|
||||
|
||||
...
|
||||
---
|
||||
name: v4s32_gpr
|
||||
alignment: 2
|
||||
legalized: true
|
||||
tracksRegLiveness: true
|
||||
registers:
|
||||
- { id: 0, class: _ }
|
||||
- { id: 1, class: _ }
|
||||
- { id: 2, class: _ }
|
||||
body: |
|
||||
bb.1.entry:
|
||||
liveins: $q0
|
||||
|
||||
; CHECK-LABEL: name: v4s32_gpr
|
||||
; CHECK: liveins: $q0
|
||||
; CHECK: [[COPY:%[0-9]+]]:fpr(<4 x s32>) = COPY $q0
|
||||
; CHECK: [[C:%[0-9]+]]:gpr(s64) = G_CONSTANT i64 0
|
||||
; CHECK: [[EVEC:%[0-9]+]]:fpr(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<4 x s32>), [[C]](s64)
|
||||
; CHECK: $s0 = COPY [[EVEC]](s32)
|
||||
; CHECK: RET_ReallyLR implicit $s0
|
||||
%0:_(<4 x s32>) = COPY $q0
|
||||
%2:_(s64) = G_CONSTANT i64 0
|
||||
%1:_(s32) = G_EXTRACT_VECTOR_ELT %0(<4 x s32>), %2(s64)
|
||||
$s0 = COPY %1(s32)
|
||||
RET_ReallyLR implicit $s0
|
||||
|
||||
...
|
||||
---
|
||||
name: v2s64_fpr
|
||||
alignment: 2
|
||||
legalized: true
|
||||
tracksRegLiveness: true
|
||||
registers:
|
||||
- { id: 0, class: _ }
|
||||
- { id: 1, class: _ }
|
||||
- { id: 2, class: _ }
|
||||
body: |
|
||||
bb.1.entry:
|
||||
liveins: $q0
|
||||
|
||||
; CHECK-LABEL: name: v2s64_fpr
|
||||
; CHECK: liveins: $q0
|
||||
; CHECK: [[COPY:%[0-9]+]]:fpr(<2 x s64>) = COPY $q0
|
||||
; CHECK: [[C:%[0-9]+]]:gpr(s64) = G_CONSTANT i64 2
|
||||
; CHECK: [[EVEC:%[0-9]+]]:fpr(s64) = G_EXTRACT_VECTOR_ELT [[COPY]](<2 x s64>), [[C]](s64)
|
||||
; CHECK: $d0 = COPY [[EVEC]](s64)
|
||||
; CHECK: RET_ReallyLR implicit $d0
|
||||
%0:_(<2 x s64>) = COPY $q0
|
||||
%2:_(s64) = G_CONSTANT i64 2
|
||||
%1:_(s64) = G_EXTRACT_VECTOR_ELT %0(<2 x s64>), %2(s64)
|
||||
$d0 = COPY %1(s64)
|
||||
RET_ReallyLR implicit $d0
|
||||
|
||||
...
|
||||
---
|
||||
name: v4s16_fpr
|
||||
alignment: 2
|
||||
legalized: true
|
||||
tracksRegLiveness: true
|
||||
registers:
|
||||
- { id: 0, class: _ }
|
||||
- { id: 1, class: _ }
|
||||
- { id: 2, class: _ }
|
||||
body: |
|
||||
bb.1.entry:
|
||||
liveins: $d0
|
||||
|
||||
; CHECK-LABEL: name: v4s16_fpr
|
||||
; CHECK: liveins: $d0
|
||||
; CHECK: [[COPY:%[0-9]+]]:fpr(<4 x s16>) = COPY $d0
|
||||
; CHECK: [[C:%[0-9]+]]:gpr(s64) = G_CONSTANT i64 1
|
||||
; CHECK: [[EVEC:%[0-9]+]]:fpr(s16) = G_EXTRACT_VECTOR_ELT [[COPY]](<4 x s16>), [[C]](s64)
|
||||
; CHECK: $h0 = COPY [[EVEC]](s16)
|
||||
; CHECK: RET_ReallyLR implicit $h0
|
||||
%0:_(<4 x s16>) = COPY $d0
|
||||
%2:_(s64) = G_CONSTANT i64 1
|
||||
%1:_(s16) = G_EXTRACT_VECTOR_ELT %0(<4 x s16>), %2(s64)
|
||||
$h0 = COPY %1(s16)
|
||||
RET_ReallyLR implicit $h0
|
||||
|
||||
...
|
|
@ -0,0 +1,94 @@
|
|||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc -mtriple=aarch64-unknown-unknown -verify-machineinstrs -O0 -run-pass=instruction-select %s -o - | FileCheck %s
|
||||
...
|
||||
---
|
||||
name: v2s32_fpr
|
||||
alignment: 2
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
registers:
|
||||
- { id: 0, class: fpr }
|
||||
- { id: 1, class: fpr }
|
||||
- { id: 2, class: gpr }
|
||||
- { id: 3, class: fpr }
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $d0
|
||||
|
||||
; CHECK-LABEL: name: v2s32_fpr
|
||||
; CHECK: liveins: $d0
|
||||
; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
|
||||
; CHECK: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF
|
||||
; CHECK: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], [[COPY]], %subreg.dsub
|
||||
; CHECK: [[CPYi32_:%[0-9]+]]:fpr32 = CPYi32 [[INSERT_SUBREG]], 1
|
||||
; CHECK: $s0 = COPY [[CPYi32_]]
|
||||
; CHECK: RET_ReallyLR implicit $s0
|
||||
%0:fpr(<2 x s32>) = COPY $d0
|
||||
%2:gpr(s64) = G_CONSTANT i64 1
|
||||
%3:fpr(s64) = COPY %2(s64)
|
||||
%1:fpr(s32) = G_EXTRACT_VECTOR_ELT %0(<2 x s32>), %3(s64)
|
||||
$s0 = COPY %1(s32)
|
||||
RET_ReallyLR implicit $s0
|
||||
|
||||
...
|
||||
---
|
||||
name: v2s64_fpr
|
||||
alignment: 2
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
registers:
|
||||
- { id: 0, class: fpr }
|
||||
- { id: 1, class: fpr }
|
||||
- { id: 2, class: gpr }
|
||||
- { id: 3, class: fpr }
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $q0
|
||||
|
||||
; CHECK-LABEL: name: v2s64_fpr
|
||||
; CHECK: liveins: $q0
|
||||
; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
|
||||
; CHECK: [[CPYi64_:%[0-9]+]]:fpr64 = CPYi64 [[COPY]], 2
|
||||
; CHECK: $d0 = COPY [[CPYi64_]]
|
||||
; CHECK: RET_ReallyLR implicit $d0
|
||||
%0:fpr(<2 x s64>) = COPY $q0
|
||||
%2:gpr(s64) = G_CONSTANT i64 2
|
||||
%3:fpr(s64) = COPY %2(s64)
|
||||
%1:fpr(s64) = G_EXTRACT_VECTOR_ELT %0(<2 x s64>), %3(s64)
|
||||
$d0 = COPY %1(s64)
|
||||
RET_ReallyLR implicit $d0
|
||||
|
||||
...
|
||||
---
|
||||
name: v4s16_fpr
|
||||
alignment: 2
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
registers:
|
||||
- { id: 0, class: fpr }
|
||||
- { id: 1, class: fpr }
|
||||
- { id: 2, class: gpr }
|
||||
- { id: 3, class: fpr }
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $d0
|
||||
|
||||
; CHECK-LABEL: name: v4s16_fpr
|
||||
; CHECK: liveins: $d0
|
||||
; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
|
||||
; CHECK: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF
|
||||
; CHECK: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], [[COPY]], %subreg.dsub
|
||||
; CHECK: [[CPYi16_:%[0-9]+]]:fpr16 = CPYi16 [[INSERT_SUBREG]], 1
|
||||
; CHECK: $h0 = COPY [[CPYi16_]]
|
||||
; CHECK: RET_ReallyLR implicit $h0
|
||||
%0:fpr(<4 x s16>) = COPY $d0
|
||||
%2:gpr(s64) = G_CONSTANT i64 1
|
||||
%3:fpr(s64) = COPY %2(s64)
|
||||
%1:fpr(s16) = G_EXTRACT_VECTOR_ELT %0(<4 x s16>), %3(s64)
|
||||
$h0 = COPY %1(s16)
|
||||
RET_ReallyLR implicit $h0
|
||||
|
||||
...
|
Loading…
Reference in New Issue