[AArch64][SVE] Add support for spilling/filling ZPR2/3/4

Summary:
This patch enables the register allocator to spill/fill lists of 2, 3
and 4 SVE vectors registers to/from the stack. This is implemented with
pseudo instructions that get expanded to individual LDR_ZXI/STR_ZXI
instructions in AArch64ExpandPseudoInsts.

Patch by Sander de Smalen.

Reviewed By: efriedma

Differential Revision: https://reviews.llvm.org/D75988
This commit is contained in:
Cullen Rhodes 2020-05-28 09:37:55 +00:00
parent c010d4d195
commit 8a397b66b2
4 changed files with 236 additions and 20 deletions

View File

@ -80,6 +80,9 @@ private:
bool expandSetTagLoop(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
MachineBasicBlock::iterator &NextMBBI);
bool expandSVESpillFill(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI, unsigned Opc,
unsigned N);
};
} // end anonymous namespace
@ -595,6 +598,28 @@ bool AArch64ExpandPseudo::expandSetTagLoop(
return true;
}
bool AArch64ExpandPseudo::expandSVESpillFill(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
unsigned Opc, unsigned N) {
const TargetRegisterInfo *TRI =
MBB.getParent()->getSubtarget().getRegisterInfo();
MachineInstr &MI = *MBBI;
for (unsigned Offset = 0; Offset < N; ++Offset) {
int ImmOffset = MI.getOperand(2).getImm() + Offset;
bool Kill = (Offset + 1 == N) ? MI.getOperand(1).isKill() : false;
assert(ImmOffset >= -256 && ImmOffset < 256 &&
"Immediate spill offset out of range");
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc))
.addReg(
TRI->getSubReg(MI.getOperand(0).getReg(), AArch64::zsub0 + Offset),
Opc == AArch64::LDR_ZXI ? RegState::Define : 0)
.addReg(MI.getOperand(1).getReg(), getKillRegState(Kill))
.addImm(ImmOffset);
}
MI.eraseFromParent();
return true;
}
/// If MBBI references a pseudo instruction that should be expanded here,
/// do the expansion and return true. Otherwise return false.
bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
@ -970,6 +995,18 @@ bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
report_fatal_error(
"Non-writeback variants of STGloop / STZGloop should not "
"survive past PrologEpilogInserter.");
case AArch64::STR_ZZZZXI:
return expandSVESpillFill(MBB, MBBI, AArch64::STR_ZXI, 4);
case AArch64::STR_ZZZXI:
return expandSVESpillFill(MBB, MBBI, AArch64::STR_ZXI, 3);
case AArch64::STR_ZZXI:
return expandSVESpillFill(MBB, MBBI, AArch64::STR_ZXI, 2);
case AArch64::LDR_ZZZZXI:
return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 4);
case AArch64::LDR_ZZZXI:
return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 3);
case AArch64::LDR_ZZXI:
return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 2);
}
return false;
}

View File

@ -2278,6 +2278,27 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
MinOffset = -256;
MaxOffset = 255;
break;
case AArch64::STR_ZZZZXI:
case AArch64::LDR_ZZZZXI:
Scale = TypeSize::Scalable(16);
Width = SVEMaxBytesPerVector * 4;
MinOffset = -256;
MaxOffset = 252;
break;
case AArch64::STR_ZZZXI:
case AArch64::LDR_ZZZXI:
Scale = TypeSize::Scalable(16);
Width = SVEMaxBytesPerVector * 3;
MinOffset = -256;
MaxOffset = 253;
break;
case AArch64::STR_ZZXI:
case AArch64::LDR_ZZXI:
Scale = TypeSize::Scalable(16);
Width = SVEMaxBytesPerVector * 2;
MinOffset = -256;
MaxOffset = 254;
break;
case AArch64::LDR_PXI:
case AArch64::STR_PXI:
Scale = TypeSize::Scalable(2);
@ -2984,6 +3005,7 @@ void AArch64InstrInfo::storeRegToStackSlot(
MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
unsigned Opc = 0;
bool Offset = true;
unsigned StackID = TargetStackID::Default;
switch (TRI->getSpillSize(*RC)) {
case 1:
if (AArch64::FPR8RegClass.hasSubClassEq(RC))
@ -2992,6 +3014,11 @@ void AArch64InstrInfo::storeRegToStackSlot(
case 2:
if (AArch64::FPR16RegClass.hasSubClassEq(RC))
Opc = AArch64::STRHui;
else if (AArch64::PPRRegClass.hasSubClassEq(RC)) {
assert(Subtarget.hasSVE() && "Unexpected register store without SVE");
Opc = AArch64::STR_PXI;
StackID = TargetStackID::SVEVector;
}
break;
case 4:
if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
@ -3031,6 +3058,10 @@ void AArch64InstrInfo::storeRegToStackSlot(
get(AArch64::STPXi), SrcReg, isKill,
AArch64::sube64, AArch64::subo64, FI, MMO);
return;
} else if (AArch64::ZPRRegClass.hasSubClassEq(RC)) {
assert(Subtarget.hasSVE() && "Unexpected register store without SVE");
Opc = AArch64::STR_ZXI;
StackID = TargetStackID::SVEVector;
}
break;
case 24:
@ -3049,6 +3080,10 @@ void AArch64InstrInfo::storeRegToStackSlot(
assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
Opc = AArch64::ST1Twov2d;
Offset = false;
} else if (AArch64::ZPR2RegClass.hasSubClassEq(RC)) {
assert(Subtarget.hasSVE() && "Unexpected register store without SVE");
Opc = AArch64::STR_ZZXI;
StackID = TargetStackID::SVEVector;
}
break;
case 48:
@ -3056,6 +3091,10 @@ void AArch64InstrInfo::storeRegToStackSlot(
assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
Opc = AArch64::ST1Threev2d;
Offset = false;
} else if (AArch64::ZPR3RegClass.hasSubClassEq(RC)) {
assert(Subtarget.hasSVE() && "Unexpected register store without SVE");
Opc = AArch64::STR_ZZZXI;
StackID = TargetStackID::SVEVector;
}
break;
case 64:
@ -3063,19 +3102,13 @@ void AArch64InstrInfo::storeRegToStackSlot(
assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
Opc = AArch64::ST1Fourv2d;
Offset = false;
} else if (AArch64::ZPR4RegClass.hasSubClassEq(RC)) {
assert(Subtarget.hasSVE() && "Unexpected register store without SVE");
Opc = AArch64::STR_ZZZZXI;
StackID = TargetStackID::SVEVector;
}
break;
}
unsigned StackID = TargetStackID::Default;
if (AArch64::PPRRegClass.hasSubClassEq(RC)) {
assert(Subtarget.hasSVE() && "Unexpected register store without SVE");
Opc = AArch64::STR_PXI;
StackID = TargetStackID::SVEVector;
} else if (AArch64::ZPRRegClass.hasSubClassEq(RC)) {
assert(Subtarget.hasSVE() && "Unexpected register store without SVE");
Opc = AArch64::STR_ZXI;
StackID = TargetStackID::SVEVector;
}
assert(Opc && "Unknown register class");
MFI.setStackID(FI, StackID);
@ -3126,6 +3159,7 @@ void AArch64InstrInfo::loadRegFromStackSlot(
unsigned Opc = 0;
bool Offset = true;
unsigned StackID = TargetStackID::Default;
switch (TRI->getSpillSize(*RC)) {
case 1:
if (AArch64::FPR8RegClass.hasSubClassEq(RC))
@ -3134,6 +3168,11 @@ void AArch64InstrInfo::loadRegFromStackSlot(
case 2:
if (AArch64::FPR16RegClass.hasSubClassEq(RC))
Opc = AArch64::LDRHui;
else if (AArch64::PPRRegClass.hasSubClassEq(RC)) {
assert(Subtarget.hasSVE() && "Unexpected register load without SVE");
Opc = AArch64::LDR_PXI;
StackID = TargetStackID::SVEVector;
}
break;
case 4:
if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
@ -3173,6 +3212,10 @@ void AArch64InstrInfo::loadRegFromStackSlot(
get(AArch64::LDPXi), DestReg, AArch64::sube64,
AArch64::subo64, FI, MMO);
return;
} else if (AArch64::ZPRRegClass.hasSubClassEq(RC)) {
assert(Subtarget.hasSVE() && "Unexpected register load without SVE");
Opc = AArch64::LDR_ZXI;
StackID = TargetStackID::SVEVector;
}
break;
case 24:
@ -3191,6 +3234,10 @@ void AArch64InstrInfo::loadRegFromStackSlot(
assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
Opc = AArch64::LD1Twov2d;
Offset = false;
} else if (AArch64::ZPR2RegClass.hasSubClassEq(RC)) {
assert(Subtarget.hasSVE() && "Unexpected register load without SVE");
Opc = AArch64::LDR_ZZXI;
StackID = TargetStackID::SVEVector;
}
break;
case 48:
@ -3198,6 +3245,10 @@ void AArch64InstrInfo::loadRegFromStackSlot(
assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
Opc = AArch64::LD1Threev2d;
Offset = false;
} else if (AArch64::ZPR3RegClass.hasSubClassEq(RC)) {
assert(Subtarget.hasSVE() && "Unexpected register load without SVE");
Opc = AArch64::LDR_ZZZXI;
StackID = TargetStackID::SVEVector;
}
break;
case 64:
@ -3205,20 +3256,14 @@ void AArch64InstrInfo::loadRegFromStackSlot(
assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
Opc = AArch64::LD1Fourv2d;
Offset = false;
} else if (AArch64::ZPR4RegClass.hasSubClassEq(RC)) {
assert(Subtarget.hasSVE() && "Unexpected register load without SVE");
Opc = AArch64::LDR_ZZZZXI;
StackID = TargetStackID::SVEVector;
}
break;
}
unsigned StackID = TargetStackID::Default;
if (AArch64::PPRRegClass.hasSubClassEq(RC)) {
assert(Subtarget.hasSVE() && "Unexpected register load without SVE");
Opc = AArch64::LDR_PXI;
StackID = TargetStackID::SVEVector;
} else if (AArch64::ZPRRegClass.hasSubClassEq(RC)) {
assert(Subtarget.hasSVE() && "Unexpected register load without SVE");
Opc = AArch64::LDR_ZXI;
StackID = TargetStackID::SVEVector;
}
assert(Opc && "Unknown register class");
MFI.setStackID(FI, StackID);

View File

@ -1334,6 +1334,20 @@ multiclass sve_prefetch<SDPatternOperator prefetch, ValueType PredTy, Instructio
def : InstAlias<"fcmlt $Zd, $Pg/z, $Zm, $Zn",
(FCMGT_PPzZZ_D PPR64:$Zd, PPR3bAny:$Pg, ZPR64:$Zn, ZPR64:$Zm), 0>;
// Pseudo instructions representing unpredicated LDR and STR for ZPR2,3,4.
// These get expanded to individual LDR_ZXI/STR_ZXI instructions in
// AArch64ExpandPseudoInsts.
let mayLoad = 1, hasSideEffects = 0 in {
def LDR_ZZXI : Pseudo<(outs ZZ_b:$Zd), (ins GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>;
def LDR_ZZZXI : Pseudo<(outs ZZZ_b:$Zd), (ins GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>;
def LDR_ZZZZXI : Pseudo<(outs ZZZZ_b:$Zd), (ins GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>;
}
let mayStore = 1, hasSideEffects = 0 in {
def STR_ZZXI : Pseudo<(outs), (ins ZZ_b:$Zs, GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>;
def STR_ZZZXI : Pseudo<(outs), (ins ZZZ_b:$Zs, GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>;
def STR_ZZZZXI : Pseudo<(outs), (ins ZZZZ_b:$Zs, GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>;
}
def : Pat<(AArch64ptest (nxv16i1 PPR:$pg), (nxv16i1 PPR:$src)),
(PTEST_PP PPR:$pg, PPR:$src)>;
def : Pat<(AArch64ptest (nxv8i1 PPR:$pg), (nxv8i1 PPR:$src)),

View File

@ -8,6 +8,9 @@
define aarch64_sve_vector_pcs void @spills_fills_stack_id_ppr() #0 { entry: unreachable }
define aarch64_sve_vector_pcs void @spills_fills_stack_id_zpr() #0 { entry: unreachable }
define aarch64_sve_vector_pcs void @spills_fills_stack_id_zpr2() #0 { entry: unreachable }
define aarch64_sve_vector_pcs void @spills_fills_stack_id_zpr3() #0 { entry: unreachable }
define aarch64_sve_vector_pcs void @spills_fills_stack_id_zpr4() #0 { entry: unreachable }
attributes #0 = { nounwind "target-features"="+sve" }
@ -90,3 +93,120 @@ body: |
$z0 = COPY %0
RET_ReallyLR
...
---
name: spills_fills_stack_id_zpr2
tracksRegLiveness: true
registers:
- { id: 0, class: zpr2 }
stack:
liveins:
- { reg: '$z0_z1', virtual-reg: '%0' }
body: |
bb.0.entry:
liveins: $z0_z1
; CHECK-LABEL: name: spills_fills_stack_id_zpr2
; CHECK: stack:
; CHECK: - { id: 0, name: '', type: spill-slot, offset: 0, size: 32, alignment: 16
; CHECK-NEXT: stack-id: sve-vec
; EXPAND-LABEL: name: spills_fills_stack_id_zpr2
; EXPAND: STR_ZXI $z0, $sp, 0
; EXPAND: STR_ZXI $z1, $sp, 1
; EXPAND: $z0 = LDR_ZXI $sp, 0
; EXPAND: $z1 = LDR_ZXI $sp, 1
%0:zpr2 = COPY $z0_z1
$z0_z1_z2_z3 = IMPLICIT_DEF
$z4_z5_z6_z7 = IMPLICIT_DEF
$z8_z9_z10_z11 = IMPLICIT_DEF
$z12_z13_z14_z15 = IMPLICIT_DEF
$z16_z17_z18_z19 = IMPLICIT_DEF
$z20_z21_z22_z23 = IMPLICIT_DEF
$z24_z25_z26_z27 = IMPLICIT_DEF
$z28_z29_z30_z31 = IMPLICIT_DEF
$z0_z1 = COPY %0
RET_ReallyLR
...
---
name: spills_fills_stack_id_zpr3
tracksRegLiveness: true
registers:
- { id: 0, class: zpr3 }
stack:
liveins:
- { reg: '$z0_z1_z2', virtual-reg: '%0' }
body: |
bb.0.entry:
liveins: $z0_z1_z2
; CHECK-LABEL: name: spills_fills_stack_id_zpr3
; CHECK: stack:
; CHECK: - { id: 0, name: '', type: spill-slot, offset: 0, size: 48, alignment: 16
; CHECK-NEXT: stack-id: sve-vec
; EXPAND-LABEL: name: spills_fills_stack_id_zpr3
; EXPAND: STR_ZXI $z0, $sp, 0
; EXPAND: STR_ZXI $z1, $sp, 1
; EXPAND: STR_ZXI $z2, $sp, 2
; EXPAND: $z0 = LDR_ZXI $sp, 0
; EXPAND: $z1 = LDR_ZXI $sp, 1
; EXPAND: $z2 = LDR_ZXI $sp, 2
%0:zpr3 = COPY $z0_z1_z2
$z0_z1_z2_z3 = IMPLICIT_DEF
$z4_z5_z6_z7 = IMPLICIT_DEF
$z8_z9_z10_z11 = IMPLICIT_DEF
$z12_z13_z14_z15 = IMPLICIT_DEF
$z16_z17_z18_z19 = IMPLICIT_DEF
$z20_z21_z22_z23 = IMPLICIT_DEF
$z24_z25_z26_z27 = IMPLICIT_DEF
$z28_z29_z30_z31 = IMPLICIT_DEF
$z0_z1_z2 = COPY %0
RET_ReallyLR
...
---
name: spills_fills_stack_id_zpr4
tracksRegLiveness: true
registers:
- { id: 0, class: zpr4 }
stack:
liveins:
- { reg: '$z0_z1_z2_z3', virtual-reg: '%0' }
body: |
bb.0.entry:
liveins: $z0_z1_z2_z3
; CHECK-LABEL: name: spills_fills_stack_id_zpr4
; CHECK: stack:
; CHECK: - { id: 0, name: '', type: spill-slot, offset: 0, size: 64, alignment: 16
; CHECK-NEXT: stack-id: sve-vec
; EXPAND-LABEL: name: spills_fills_stack_id_zpr4
; EXPAND: STR_ZXI $z0, $sp, 0
; EXPAND: STR_ZXI $z1, $sp, 1
; EXPAND: STR_ZXI $z2, $sp, 2
; EXPAND: STR_ZXI $z3, $sp, 3
; EXPAND: $z0 = LDR_ZXI $sp, 0
; EXPAND: $z1 = LDR_ZXI $sp, 1
; EXPAND: $z2 = LDR_ZXI $sp, 2
; EXPAND: $z3 = LDR_ZXI $sp, 3
%0:zpr4 = COPY $z0_z1_z2_z3
$z0_z1_z2_z3 = IMPLICIT_DEF
$z4_z5_z6_z7 = IMPLICIT_DEF
$z8_z9_z10_z11 = IMPLICIT_DEF
$z12_z13_z14_z15 = IMPLICIT_DEF
$z16_z17_z18_z19 = IMPLICIT_DEF
$z20_z21_z22_z23 = IMPLICIT_DEF
$z24_z25_z26_z27 = IMPLICIT_DEF
$z28_z29_z30_z31 = IMPLICIT_DEF
$z0_z1_z2_z3 = COPY %0
RET_ReallyLR
...