forked from OSchip/llvm-project
[AArch64][GlobalISel] Optimize G_BUILD_VECTOR of undef + 1 elt -> SUBREG_TO_REG
This pattern ``` %elt = ... something ... %undef = G_IMPLICIT_DEF %vec = G_BUILD_VECTOR %elt, %undef, %undef, ... %undef ``` Can be selected to a SUBREG_TO_REG, assuming `%elt` and `%vec` have the same register bank. We don't care about any of the bits in `%vec` aside from those in `%elt`, which just happens to be the 0th element. This is preferable to emitting `mov` instructions for every index. This gives minor code size improvements on the test suite at -Os. Differential Revision: https://reviews.llvm.org/D108773
This commit is contained in:
parent
9b9e7f6f4e
commit
2363a20001
|
@ -164,6 +164,9 @@ private:
|
|||
bool selectInsertElt(MachineInstr &I, MachineRegisterInfo &MRI);
|
||||
bool tryOptConstantBuildVec(MachineInstr &MI, LLT DstTy,
|
||||
MachineRegisterInfo &MRI);
|
||||
/// \returns true if a G_BUILD_VECTOR instruction \p MI can be selected as a
|
||||
/// SUBREG_TO_REG.
|
||||
bool tryOptBuildVecToSubregToReg(MachineInstr &MI, MachineRegisterInfo &MRI);
|
||||
bool selectBuildVector(MachineInstr &I, MachineRegisterInfo &MRI);
|
||||
bool selectMergeValues(MachineInstr &I, MachineRegisterInfo &MRI);
|
||||
bool selectUnmergeValues(MachineInstr &I, MachineRegisterInfo &MRI);
|
||||
|
@ -4963,6 +4966,47 @@ bool AArch64InstructionSelector::tryOptConstantBuildVec(
|
|||
return true;
|
||||
}
|
||||
|
||||
bool AArch64InstructionSelector::tryOptBuildVecToSubregToReg(
|
||||
MachineInstr &I, MachineRegisterInfo &MRI) {
|
||||
// Given:
|
||||
// %vec = G_BUILD_VECTOR %elt, %undef, %undef, ... %undef
|
||||
//
|
||||
// Select the G_BUILD_VECTOR as a SUBREG_TO_REG from %elt.
|
||||
Register Dst = I.getOperand(0).getReg();
|
||||
Register EltReg = I.getOperand(1).getReg();
|
||||
LLT EltTy = MRI.getType(EltReg);
|
||||
// If the index isn't on the same bank as its elements, then this can't be a
|
||||
// SUBREG_TO_REG.
|
||||
const RegisterBank &EltRB = *RBI.getRegBank(EltReg, MRI, TRI);
|
||||
const RegisterBank &DstRB = *RBI.getRegBank(Dst, MRI, TRI);
|
||||
if (EltRB != DstRB)
|
||||
return false;
|
||||
if (any_of(make_range(I.operands_begin() + 2, I.operands_end()),
|
||||
[&MRI](const MachineOperand &Op) {
|
||||
return !getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, Op.getReg(),
|
||||
MRI);
|
||||
}))
|
||||
return false;
|
||||
unsigned SubReg;
|
||||
const TargetRegisterClass *EltRC =
|
||||
getMinClassForRegBank(EltRB, EltTy.getSizeInBits());
|
||||
if (!EltRC)
|
||||
return false;
|
||||
const TargetRegisterClass *DstRC =
|
||||
getMinClassForRegBank(DstRB, MRI.getType(Dst).getSizeInBits());
|
||||
if (!DstRC)
|
||||
return false;
|
||||
if (!getSubRegForClass(EltRC, TRI, SubReg))
|
||||
return false;
|
||||
auto SubregToReg = MIB.buildInstr(AArch64::SUBREG_TO_REG, {Dst}, {})
|
||||
.addImm(0)
|
||||
.addUse(EltReg)
|
||||
.addImm(SubReg);
|
||||
I.eraseFromParent();
|
||||
constrainSelectedInstRegOperands(*SubregToReg, TII, TRI, RBI);
|
||||
return RBI.constrainGenericRegister(Dst, *DstRC, MRI);
|
||||
}
|
||||
|
||||
bool AArch64InstructionSelector::selectBuildVector(MachineInstr &I,
|
||||
MachineRegisterInfo &MRI) {
|
||||
assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
|
||||
|
@ -4974,6 +5018,9 @@ bool AArch64InstructionSelector::selectBuildVector(MachineInstr &I,
|
|||
|
||||
if (tryOptConstantBuildVec(I, DstTy, MRI))
|
||||
return true;
|
||||
if (tryOptBuildVecToSubregToReg(I, MRI))
|
||||
return true;
|
||||
|
||||
if (EltSize < 16 || EltSize > 64)
|
||||
return false; // Don't support all element types yet.
|
||||
const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
|
||||
|
|
|
@ -228,3 +228,55 @@ body: |
|
|||
$d0 = COPY %1(<8 x s8>)
|
||||
RET_ReallyLR
|
||||
...
|
||||
---
|
||||
name: undef_elts_to_subreg_to_reg
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.1:
|
||||
liveins: $s0
|
||||
; We have a BUILD_VECTOR whose 0th element is a subregister of the wide
|
||||
; register class. Everything else is undef. This is a SUBREG_TO_REG.
|
||||
|
||||
; CHECK-LABEL: name: undef_elts_to_subreg_to_reg
|
||||
; CHECK: liveins: $s0
|
||||
; CHECK: %val:fpr32 = COPY $s0
|
||||
; CHECK: %bv:fpr128 = SUBREG_TO_REG 0, %val, %subreg.ssub
|
||||
; CHECK: $q0 = COPY %bv
|
||||
; CHECK: RET_ReallyLR implicit $q0
|
||||
%val:fpr(s32) = COPY $s0
|
||||
%undef:fpr(s32) = G_IMPLICIT_DEF
|
||||
%bv:fpr(<4 x s32>) = G_BUILD_VECTOR %val(s32), %undef(s32), %undef(s32), %undef(s32)
|
||||
$q0 = COPY %bv(<4 x s32>)
|
||||
RET_ReallyLR implicit $q0
|
||||
...
|
||||
...
|
||||
---
|
||||
name: undef_elts_different_regbanks
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.1:
|
||||
liveins: $w0
|
||||
; Element is not a subregister of the wide register class. This is not a
|
||||
; SUBREG_TO_REG.
|
||||
|
||||
; CHECK-LABEL: name: undef_elts_different_regbanks
|
||||
; CHECK: liveins: $w0
|
||||
; CHECK: %val:gpr32all = COPY $w0
|
||||
; CHECK: %undef:gpr32 = IMPLICIT_DEF
|
||||
; CHECK: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF
|
||||
; CHECK: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], %val, %subreg.ssub
|
||||
; CHECK: [[INSvi32gpr:%[0-9]+]]:fpr128 = INSvi32gpr [[INSERT_SUBREG]], 1, %undef
|
||||
; CHECK: [[INSvi32gpr1:%[0-9]+]]:fpr128 = INSvi32gpr [[INSvi32gpr]], 2, %undef
|
||||
; CHECK: %bv:fpr128 = INSvi32gpr [[INSvi32gpr1]], 3, %undef
|
||||
; CHECK: $q0 = COPY %bv
|
||||
; CHECK: RET_ReallyLR implicit $q0
|
||||
%val:gpr(s32) = COPY $w0
|
||||
%undef:gpr(s32) = G_IMPLICIT_DEF
|
||||
%bv:fpr(<4 x s32>) = G_BUILD_VECTOR %val(s32), %undef(s32), %undef(s32), %undef(s32)
|
||||
$q0 = COPY %bv(<4 x s32>)
|
||||
RET_ReallyLR implicit $q0
|
||||
...
|
||||
|
|
|
@ -572,10 +572,7 @@ define void @float_vrev64(float* nocapture %source, <4 x float>* nocapture %dest
|
|||
; GISEL-LABEL: float_vrev64:
|
||||
; GISEL: // %bb.0: // %entry
|
||||
; GISEL-NEXT: movi d0, #0000000000000000
|
||||
; GISEL-NEXT: mov.s v0[1], v0[0]
|
||||
; GISEL-NEXT: mov.s v0[2], v0[0]
|
||||
; GISEL-NEXT: adrp x8, .LCPI28_0
|
||||
; GISEL-NEXT: mov.s v0[3], v0[0]
|
||||
; GISEL-NEXT: ldr q1, [x0]
|
||||
; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI28_0]
|
||||
; GISEL-NEXT: tbl.16b v0, { v0, v1 }, v2
|
||||
|
|
Loading…
Reference in New Issue