[AArch64][GlobalISel] Optimize G_BUILD_VECTOR of undef + 1 elt -> SUBREG_TO_REG

This pattern

```
%elt = ... something ...
%undef = G_IMPLICIT_DEF
%vec = G_BUILD_VECTOR %elt, %undef, %undef, ... %undef
```

Can be selected to a SUBREG_TO_REG, assuming `%elt` and `%vec` have the same
register bank. We don't care about any of the bits in `%vec` aside from those
in `%elt`, which just happens to be the 0th element.

This is preferable to emitting `mov` instructions for every index.

This gives minor code size improvements on the test suite at -Os.

Differential Revision: https://reviews.llvm.org/D108773
This commit is contained in:
Jessica Paquette 2021-08-26 11:04:17 -07:00
parent 9b9e7f6f4e
commit 2363a20001
3 changed files with 99 additions and 3 deletions

View File

@ -164,6 +164,9 @@ private:
bool selectInsertElt(MachineInstr &I, MachineRegisterInfo &MRI);
bool tryOptConstantBuildVec(MachineInstr &MI, LLT DstTy,
MachineRegisterInfo &MRI);
/// \returns true if a G_BUILD_VECTOR instruction \p MI can be selected as a
/// SUBREG_TO_REG.
bool tryOptBuildVecToSubregToReg(MachineInstr &MI, MachineRegisterInfo &MRI);
bool selectBuildVector(MachineInstr &I, MachineRegisterInfo &MRI);
bool selectMergeValues(MachineInstr &I, MachineRegisterInfo &MRI);
bool selectUnmergeValues(MachineInstr &I, MachineRegisterInfo &MRI);
@ -4963,6 +4966,47 @@ bool AArch64InstructionSelector::tryOptConstantBuildVec(
return true;
}
bool AArch64InstructionSelector::tryOptBuildVecToSubregToReg(
MachineInstr &I, MachineRegisterInfo &MRI) {
// Given:
// %vec = G_BUILD_VECTOR %elt, %undef, %undef, ... %undef
//
// Select the G_BUILD_VECTOR as a SUBREG_TO_REG from %elt.
Register Dst = I.getOperand(0).getReg();
Register EltReg = I.getOperand(1).getReg();
LLT EltTy = MRI.getType(EltReg);
// If the index isn't on the same bank as its elements, then this can't be a
// SUBREG_TO_REG.
const RegisterBank &EltRB = *RBI.getRegBank(EltReg, MRI, TRI);
const RegisterBank &DstRB = *RBI.getRegBank(Dst, MRI, TRI);
if (EltRB != DstRB)
return false;
if (any_of(make_range(I.operands_begin() + 2, I.operands_end()),
[&MRI](const MachineOperand &Op) {
return !getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, Op.getReg(),
MRI);
}))
return false;
unsigned SubReg;
const TargetRegisterClass *EltRC =
getMinClassForRegBank(EltRB, EltTy.getSizeInBits());
if (!EltRC)
return false;
const TargetRegisterClass *DstRC =
getMinClassForRegBank(DstRB, MRI.getType(Dst).getSizeInBits());
if (!DstRC)
return false;
if (!getSubRegForClass(EltRC, TRI, SubReg))
return false;
auto SubregToReg = MIB.buildInstr(AArch64::SUBREG_TO_REG, {Dst}, {})
.addImm(0)
.addUse(EltReg)
.addImm(SubReg);
I.eraseFromParent();
constrainSelectedInstRegOperands(*SubregToReg, TII, TRI, RBI);
return RBI.constrainGenericRegister(Dst, *DstRC, MRI);
}
bool AArch64InstructionSelector::selectBuildVector(MachineInstr &I,
MachineRegisterInfo &MRI) {
assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
@ -4974,6 +5018,9 @@ bool AArch64InstructionSelector::selectBuildVector(MachineInstr &I,
if (tryOptConstantBuildVec(I, DstTy, MRI))
return true;
if (tryOptBuildVecToSubregToReg(I, MRI))
return true;
if (EltSize < 16 || EltSize > 64)
return false; // Don't support all element types yet.
const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);

View File

@ -228,3 +228,55 @@ body: |
$d0 = COPY %1(<8 x s8>)
RET_ReallyLR
...
---
name: undef_elts_to_subreg_to_reg
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.1:
liveins: $s0
; We have a BUILD_VECTOR whose 0th element is a subregister of the wide
; register class. Everything else is undef. This is a SUBREG_TO_REG.
; CHECK-LABEL: name: undef_elts_to_subreg_to_reg
; CHECK: liveins: $s0
; CHECK: %val:fpr32 = COPY $s0
; CHECK: %bv:fpr128 = SUBREG_TO_REG 0, %val, %subreg.ssub
; CHECK: $q0 = COPY %bv
; CHECK: RET_ReallyLR implicit $q0
%val:fpr(s32) = COPY $s0
%undef:fpr(s32) = G_IMPLICIT_DEF
%bv:fpr(<4 x s32>) = G_BUILD_VECTOR %val(s32), %undef(s32), %undef(s32), %undef(s32)
$q0 = COPY %bv(<4 x s32>)
RET_ReallyLR implicit $q0
...
...
---
name: undef_elts_different_regbanks
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.1:
liveins: $w0
; Element is not a subregister of the wide register class. This is not a
; SUBREG_TO_REG.
; CHECK-LABEL: name: undef_elts_different_regbanks
; CHECK: liveins: $w0
; CHECK: %val:gpr32all = COPY $w0
; CHECK: %undef:gpr32 = IMPLICIT_DEF
; CHECK: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF
; CHECK: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], %val, %subreg.ssub
; CHECK: [[INSvi32gpr:%[0-9]+]]:fpr128 = INSvi32gpr [[INSERT_SUBREG]], 1, %undef
; CHECK: [[INSvi32gpr1:%[0-9]+]]:fpr128 = INSvi32gpr [[INSvi32gpr]], 2, %undef
; CHECK: %bv:fpr128 = INSvi32gpr [[INSvi32gpr1]], 3, %undef
; CHECK: $q0 = COPY %bv
; CHECK: RET_ReallyLR implicit $q0
%val:gpr(s32) = COPY $w0
%undef:gpr(s32) = G_IMPLICIT_DEF
%bv:fpr(<4 x s32>) = G_BUILD_VECTOR %val(s32), %undef(s32), %undef(s32), %undef(s32)
$q0 = COPY %bv(<4 x s32>)
RET_ReallyLR implicit $q0
...

View File

@ -572,10 +572,7 @@ define void @float_vrev64(float* nocapture %source, <4 x float>* nocapture %dest
; GISEL-LABEL: float_vrev64:
; GISEL: // %bb.0: // %entry
; GISEL-NEXT: movi d0, #0000000000000000
; GISEL-NEXT: mov.s v0[1], v0[0]
; GISEL-NEXT: mov.s v0[2], v0[0]
; GISEL-NEXT: adrp x8, .LCPI28_0
; GISEL-NEXT: mov.s v0[3], v0[0]
; GISEL-NEXT: ldr q1, [x0]
; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI28_0]
; GISEL-NEXT: tbl.16b v0, { v0, v1 }, v2