forked from OSchip/llvm-project
[AArch64][SVE] Add support for spilling/filling ZPR2/3/4
Summary: This patch enables the register allocator to spill/fill lists of 2, 3 and 4 SVE vectors registers to/from the stack. This is implemented with pseudo instructions that get expanded to individual LDR_ZXI/STR_ZXI instructions in AArch64ExpandPseudoInsts. Patch by Sander de Smalen. Reviewed By: efriedma Differential Revision: https://reviews.llvm.org/D75988
This commit is contained in:
parent
c010d4d195
commit
8a397b66b2
|
@ -80,6 +80,9 @@ private:
|
|||
bool expandSetTagLoop(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator MBBI,
|
||||
MachineBasicBlock::iterator &NextMBBI);
|
||||
bool expandSVESpillFill(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator MBBI, unsigned Opc,
|
||||
unsigned N);
|
||||
};
|
||||
|
||||
} // end anonymous namespace
|
||||
|
@ -595,6 +598,28 @@ bool AArch64ExpandPseudo::expandSetTagLoop(
|
|||
return true;
|
||||
}
|
||||
|
||||
bool AArch64ExpandPseudo::expandSVESpillFill(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator MBBI,
|
||||
unsigned Opc, unsigned N) {
|
||||
const TargetRegisterInfo *TRI =
|
||||
MBB.getParent()->getSubtarget().getRegisterInfo();
|
||||
MachineInstr &MI = *MBBI;
|
||||
for (unsigned Offset = 0; Offset < N; ++Offset) {
|
||||
int ImmOffset = MI.getOperand(2).getImm() + Offset;
|
||||
bool Kill = (Offset + 1 == N) ? MI.getOperand(1).isKill() : false;
|
||||
assert(ImmOffset >= -256 && ImmOffset < 256 &&
|
||||
"Immediate spill offset out of range");
|
||||
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc))
|
||||
.addReg(
|
||||
TRI->getSubReg(MI.getOperand(0).getReg(), AArch64::zsub0 + Offset),
|
||||
Opc == AArch64::LDR_ZXI ? RegState::Define : 0)
|
||||
.addReg(MI.getOperand(1).getReg(), getKillRegState(Kill))
|
||||
.addImm(ImmOffset);
|
||||
}
|
||||
MI.eraseFromParent();
|
||||
return true;
|
||||
}
|
||||
|
||||
/// If MBBI references a pseudo instruction that should be expanded here,
|
||||
/// do the expansion and return true. Otherwise return false.
|
||||
bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
|
||||
|
@ -970,6 +995,18 @@ bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
|
|||
report_fatal_error(
|
||||
"Non-writeback variants of STGloop / STZGloop should not "
|
||||
"survive past PrologEpilogInserter.");
|
||||
case AArch64::STR_ZZZZXI:
|
||||
return expandSVESpillFill(MBB, MBBI, AArch64::STR_ZXI, 4);
|
||||
case AArch64::STR_ZZZXI:
|
||||
return expandSVESpillFill(MBB, MBBI, AArch64::STR_ZXI, 3);
|
||||
case AArch64::STR_ZZXI:
|
||||
return expandSVESpillFill(MBB, MBBI, AArch64::STR_ZXI, 2);
|
||||
case AArch64::LDR_ZZZZXI:
|
||||
return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 4);
|
||||
case AArch64::LDR_ZZZXI:
|
||||
return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 3);
|
||||
case AArch64::LDR_ZZXI:
|
||||
return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 2);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
|
|
@ -2278,6 +2278,27 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
|
|||
MinOffset = -256;
|
||||
MaxOffset = 255;
|
||||
break;
|
||||
case AArch64::STR_ZZZZXI:
|
||||
case AArch64::LDR_ZZZZXI:
|
||||
Scale = TypeSize::Scalable(16);
|
||||
Width = SVEMaxBytesPerVector * 4;
|
||||
MinOffset = -256;
|
||||
MaxOffset = 252;
|
||||
break;
|
||||
case AArch64::STR_ZZZXI:
|
||||
case AArch64::LDR_ZZZXI:
|
||||
Scale = TypeSize::Scalable(16);
|
||||
Width = SVEMaxBytesPerVector * 3;
|
||||
MinOffset = -256;
|
||||
MaxOffset = 253;
|
||||
break;
|
||||
case AArch64::STR_ZZXI:
|
||||
case AArch64::LDR_ZZXI:
|
||||
Scale = TypeSize::Scalable(16);
|
||||
Width = SVEMaxBytesPerVector * 2;
|
||||
MinOffset = -256;
|
||||
MaxOffset = 254;
|
||||
break;
|
||||
case AArch64::LDR_PXI:
|
||||
case AArch64::STR_PXI:
|
||||
Scale = TypeSize::Scalable(2);
|
||||
|
@ -2984,6 +3005,7 @@ void AArch64InstrInfo::storeRegToStackSlot(
|
|||
MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
|
||||
unsigned Opc = 0;
|
||||
bool Offset = true;
|
||||
unsigned StackID = TargetStackID::Default;
|
||||
switch (TRI->getSpillSize(*RC)) {
|
||||
case 1:
|
||||
if (AArch64::FPR8RegClass.hasSubClassEq(RC))
|
||||
|
@ -2992,6 +3014,11 @@ void AArch64InstrInfo::storeRegToStackSlot(
|
|||
case 2:
|
||||
if (AArch64::FPR16RegClass.hasSubClassEq(RC))
|
||||
Opc = AArch64::STRHui;
|
||||
else if (AArch64::PPRRegClass.hasSubClassEq(RC)) {
|
||||
assert(Subtarget.hasSVE() && "Unexpected register store without SVE");
|
||||
Opc = AArch64::STR_PXI;
|
||||
StackID = TargetStackID::SVEVector;
|
||||
}
|
||||
break;
|
||||
case 4:
|
||||
if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
|
||||
|
@ -3031,6 +3058,10 @@ void AArch64InstrInfo::storeRegToStackSlot(
|
|||
get(AArch64::STPXi), SrcReg, isKill,
|
||||
AArch64::sube64, AArch64::subo64, FI, MMO);
|
||||
return;
|
||||
} else if (AArch64::ZPRRegClass.hasSubClassEq(RC)) {
|
||||
assert(Subtarget.hasSVE() && "Unexpected register store without SVE");
|
||||
Opc = AArch64::STR_ZXI;
|
||||
StackID = TargetStackID::SVEVector;
|
||||
}
|
||||
break;
|
||||
case 24:
|
||||
|
@ -3049,6 +3080,10 @@ void AArch64InstrInfo::storeRegToStackSlot(
|
|||
assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
|
||||
Opc = AArch64::ST1Twov2d;
|
||||
Offset = false;
|
||||
} else if (AArch64::ZPR2RegClass.hasSubClassEq(RC)) {
|
||||
assert(Subtarget.hasSVE() && "Unexpected register store without SVE");
|
||||
Opc = AArch64::STR_ZZXI;
|
||||
StackID = TargetStackID::SVEVector;
|
||||
}
|
||||
break;
|
||||
case 48:
|
||||
|
@ -3056,6 +3091,10 @@ void AArch64InstrInfo::storeRegToStackSlot(
|
|||
assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
|
||||
Opc = AArch64::ST1Threev2d;
|
||||
Offset = false;
|
||||
} else if (AArch64::ZPR3RegClass.hasSubClassEq(RC)) {
|
||||
assert(Subtarget.hasSVE() && "Unexpected register store without SVE");
|
||||
Opc = AArch64::STR_ZZZXI;
|
||||
StackID = TargetStackID::SVEVector;
|
||||
}
|
||||
break;
|
||||
case 64:
|
||||
|
@ -3063,19 +3102,13 @@ void AArch64InstrInfo::storeRegToStackSlot(
|
|||
assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
|
||||
Opc = AArch64::ST1Fourv2d;
|
||||
Offset = false;
|
||||
} else if (AArch64::ZPR4RegClass.hasSubClassEq(RC)) {
|
||||
assert(Subtarget.hasSVE() && "Unexpected register store without SVE");
|
||||
Opc = AArch64::STR_ZZZZXI;
|
||||
StackID = TargetStackID::SVEVector;
|
||||
}
|
||||
break;
|
||||
}
|
||||
unsigned StackID = TargetStackID::Default;
|
||||
if (AArch64::PPRRegClass.hasSubClassEq(RC)) {
|
||||
assert(Subtarget.hasSVE() && "Unexpected register store without SVE");
|
||||
Opc = AArch64::STR_PXI;
|
||||
StackID = TargetStackID::SVEVector;
|
||||
} else if (AArch64::ZPRRegClass.hasSubClassEq(RC)) {
|
||||
assert(Subtarget.hasSVE() && "Unexpected register store without SVE");
|
||||
Opc = AArch64::STR_ZXI;
|
||||
StackID = TargetStackID::SVEVector;
|
||||
}
|
||||
assert(Opc && "Unknown register class");
|
||||
MFI.setStackID(FI, StackID);
|
||||
|
||||
|
@ -3126,6 +3159,7 @@ void AArch64InstrInfo::loadRegFromStackSlot(
|
|||
|
||||
unsigned Opc = 0;
|
||||
bool Offset = true;
|
||||
unsigned StackID = TargetStackID::Default;
|
||||
switch (TRI->getSpillSize(*RC)) {
|
||||
case 1:
|
||||
if (AArch64::FPR8RegClass.hasSubClassEq(RC))
|
||||
|
@ -3134,6 +3168,11 @@ void AArch64InstrInfo::loadRegFromStackSlot(
|
|||
case 2:
|
||||
if (AArch64::FPR16RegClass.hasSubClassEq(RC))
|
||||
Opc = AArch64::LDRHui;
|
||||
else if (AArch64::PPRRegClass.hasSubClassEq(RC)) {
|
||||
assert(Subtarget.hasSVE() && "Unexpected register load without SVE");
|
||||
Opc = AArch64::LDR_PXI;
|
||||
StackID = TargetStackID::SVEVector;
|
||||
}
|
||||
break;
|
||||
case 4:
|
||||
if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
|
||||
|
@ -3173,6 +3212,10 @@ void AArch64InstrInfo::loadRegFromStackSlot(
|
|||
get(AArch64::LDPXi), DestReg, AArch64::sube64,
|
||||
AArch64::subo64, FI, MMO);
|
||||
return;
|
||||
} else if (AArch64::ZPRRegClass.hasSubClassEq(RC)) {
|
||||
assert(Subtarget.hasSVE() && "Unexpected register load without SVE");
|
||||
Opc = AArch64::LDR_ZXI;
|
||||
StackID = TargetStackID::SVEVector;
|
||||
}
|
||||
break;
|
||||
case 24:
|
||||
|
@ -3191,6 +3234,10 @@ void AArch64InstrInfo::loadRegFromStackSlot(
|
|||
assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
|
||||
Opc = AArch64::LD1Twov2d;
|
||||
Offset = false;
|
||||
} else if (AArch64::ZPR2RegClass.hasSubClassEq(RC)) {
|
||||
assert(Subtarget.hasSVE() && "Unexpected register load without SVE");
|
||||
Opc = AArch64::LDR_ZZXI;
|
||||
StackID = TargetStackID::SVEVector;
|
||||
}
|
||||
break;
|
||||
case 48:
|
||||
|
@ -3198,6 +3245,10 @@ void AArch64InstrInfo::loadRegFromStackSlot(
|
|||
assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
|
||||
Opc = AArch64::LD1Threev2d;
|
||||
Offset = false;
|
||||
} else if (AArch64::ZPR3RegClass.hasSubClassEq(RC)) {
|
||||
assert(Subtarget.hasSVE() && "Unexpected register load without SVE");
|
||||
Opc = AArch64::LDR_ZZZXI;
|
||||
StackID = TargetStackID::SVEVector;
|
||||
}
|
||||
break;
|
||||
case 64:
|
||||
|
@ -3205,20 +3256,14 @@ void AArch64InstrInfo::loadRegFromStackSlot(
|
|||
assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
|
||||
Opc = AArch64::LD1Fourv2d;
|
||||
Offset = false;
|
||||
} else if (AArch64::ZPR4RegClass.hasSubClassEq(RC)) {
|
||||
assert(Subtarget.hasSVE() && "Unexpected register load without SVE");
|
||||
Opc = AArch64::LDR_ZZZZXI;
|
||||
StackID = TargetStackID::SVEVector;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
unsigned StackID = TargetStackID::Default;
|
||||
if (AArch64::PPRRegClass.hasSubClassEq(RC)) {
|
||||
assert(Subtarget.hasSVE() && "Unexpected register load without SVE");
|
||||
Opc = AArch64::LDR_PXI;
|
||||
StackID = TargetStackID::SVEVector;
|
||||
} else if (AArch64::ZPRRegClass.hasSubClassEq(RC)) {
|
||||
assert(Subtarget.hasSVE() && "Unexpected register load without SVE");
|
||||
Opc = AArch64::LDR_ZXI;
|
||||
StackID = TargetStackID::SVEVector;
|
||||
}
|
||||
assert(Opc && "Unknown register class");
|
||||
MFI.setStackID(FI, StackID);
|
||||
|
||||
|
|
|
@ -1334,6 +1334,20 @@ multiclass sve_prefetch<SDPatternOperator prefetch, ValueType PredTy, Instructio
|
|||
def : InstAlias<"fcmlt $Zd, $Pg/z, $Zm, $Zn",
|
||||
(FCMGT_PPzZZ_D PPR64:$Zd, PPR3bAny:$Pg, ZPR64:$Zn, ZPR64:$Zm), 0>;
|
||||
|
||||
// Pseudo instructions representing unpredicated LDR and STR for ZPR2,3,4.
|
||||
// These get expanded to individual LDR_ZXI/STR_ZXI instructions in
|
||||
// AArch64ExpandPseudoInsts.
|
||||
let mayLoad = 1, hasSideEffects = 0 in {
|
||||
def LDR_ZZXI : Pseudo<(outs ZZ_b:$Zd), (ins GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>;
|
||||
def LDR_ZZZXI : Pseudo<(outs ZZZ_b:$Zd), (ins GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>;
|
||||
def LDR_ZZZZXI : Pseudo<(outs ZZZZ_b:$Zd), (ins GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>;
|
||||
}
|
||||
let mayStore = 1, hasSideEffects = 0 in {
|
||||
def STR_ZZXI : Pseudo<(outs), (ins ZZ_b:$Zs, GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>;
|
||||
def STR_ZZZXI : Pseudo<(outs), (ins ZZZ_b:$Zs, GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>;
|
||||
def STR_ZZZZXI : Pseudo<(outs), (ins ZZZZ_b:$Zs, GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>;
|
||||
}
|
||||
|
||||
def : Pat<(AArch64ptest (nxv16i1 PPR:$pg), (nxv16i1 PPR:$src)),
|
||||
(PTEST_PP PPR:$pg, PPR:$src)>;
|
||||
def : Pat<(AArch64ptest (nxv8i1 PPR:$pg), (nxv8i1 PPR:$src)),
|
||||
|
|
|
@ -8,6 +8,9 @@
|
|||
|
||||
define aarch64_sve_vector_pcs void @spills_fills_stack_id_ppr() #0 { entry: unreachable }
|
||||
define aarch64_sve_vector_pcs void @spills_fills_stack_id_zpr() #0 { entry: unreachable }
|
||||
define aarch64_sve_vector_pcs void @spills_fills_stack_id_zpr2() #0 { entry: unreachable }
|
||||
define aarch64_sve_vector_pcs void @spills_fills_stack_id_zpr3() #0 { entry: unreachable }
|
||||
define aarch64_sve_vector_pcs void @spills_fills_stack_id_zpr4() #0 { entry: unreachable }
|
||||
|
||||
attributes #0 = { nounwind "target-features"="+sve" }
|
||||
|
||||
|
@ -90,3 +93,120 @@ body: |
|
|||
$z0 = COPY %0
|
||||
RET_ReallyLR
|
||||
...
|
||||
---
|
||||
name: spills_fills_stack_id_zpr2
|
||||
tracksRegLiveness: true
|
||||
registers:
|
||||
- { id: 0, class: zpr2 }
|
||||
stack:
|
||||
liveins:
|
||||
- { reg: '$z0_z1', virtual-reg: '%0' }
|
||||
body: |
|
||||
bb.0.entry:
|
||||
liveins: $z0_z1
|
||||
|
||||
; CHECK-LABEL: name: spills_fills_stack_id_zpr2
|
||||
; CHECK: stack:
|
||||
; CHECK: - { id: 0, name: '', type: spill-slot, offset: 0, size: 32, alignment: 16
|
||||
; CHECK-NEXT: stack-id: sve-vec
|
||||
|
||||
; EXPAND-LABEL: name: spills_fills_stack_id_zpr2
|
||||
; EXPAND: STR_ZXI $z0, $sp, 0
|
||||
; EXPAND: STR_ZXI $z1, $sp, 1
|
||||
; EXPAND: $z0 = LDR_ZXI $sp, 0
|
||||
; EXPAND: $z1 = LDR_ZXI $sp, 1
|
||||
|
||||
%0:zpr2 = COPY $z0_z1
|
||||
|
||||
$z0_z1_z2_z3 = IMPLICIT_DEF
|
||||
$z4_z5_z6_z7 = IMPLICIT_DEF
|
||||
$z8_z9_z10_z11 = IMPLICIT_DEF
|
||||
$z12_z13_z14_z15 = IMPLICIT_DEF
|
||||
$z16_z17_z18_z19 = IMPLICIT_DEF
|
||||
$z20_z21_z22_z23 = IMPLICIT_DEF
|
||||
$z24_z25_z26_z27 = IMPLICIT_DEF
|
||||
$z28_z29_z30_z31 = IMPLICIT_DEF
|
||||
|
||||
$z0_z1 = COPY %0
|
||||
RET_ReallyLR
|
||||
...
|
||||
---
|
||||
name: spills_fills_stack_id_zpr3
|
||||
tracksRegLiveness: true
|
||||
registers:
|
||||
- { id: 0, class: zpr3 }
|
||||
stack:
|
||||
liveins:
|
||||
- { reg: '$z0_z1_z2', virtual-reg: '%0' }
|
||||
body: |
|
||||
bb.0.entry:
|
||||
liveins: $z0_z1_z2
|
||||
|
||||
; CHECK-LABEL: name: spills_fills_stack_id_zpr3
|
||||
; CHECK: stack:
|
||||
; CHECK: - { id: 0, name: '', type: spill-slot, offset: 0, size: 48, alignment: 16
|
||||
; CHECK-NEXT: stack-id: sve-vec
|
||||
|
||||
; EXPAND-LABEL: name: spills_fills_stack_id_zpr3
|
||||
; EXPAND: STR_ZXI $z0, $sp, 0
|
||||
; EXPAND: STR_ZXI $z1, $sp, 1
|
||||
; EXPAND: STR_ZXI $z2, $sp, 2
|
||||
; EXPAND: $z0 = LDR_ZXI $sp, 0
|
||||
; EXPAND: $z1 = LDR_ZXI $sp, 1
|
||||
; EXPAND: $z2 = LDR_ZXI $sp, 2
|
||||
|
||||
%0:zpr3 = COPY $z0_z1_z2
|
||||
|
||||
$z0_z1_z2_z3 = IMPLICIT_DEF
|
||||
$z4_z5_z6_z7 = IMPLICIT_DEF
|
||||
$z8_z9_z10_z11 = IMPLICIT_DEF
|
||||
$z12_z13_z14_z15 = IMPLICIT_DEF
|
||||
$z16_z17_z18_z19 = IMPLICIT_DEF
|
||||
$z20_z21_z22_z23 = IMPLICIT_DEF
|
||||
$z24_z25_z26_z27 = IMPLICIT_DEF
|
||||
$z28_z29_z30_z31 = IMPLICIT_DEF
|
||||
|
||||
$z0_z1_z2 = COPY %0
|
||||
RET_ReallyLR
|
||||
...
|
||||
---
|
||||
name: spills_fills_stack_id_zpr4
|
||||
tracksRegLiveness: true
|
||||
registers:
|
||||
- { id: 0, class: zpr4 }
|
||||
stack:
|
||||
liveins:
|
||||
- { reg: '$z0_z1_z2_z3', virtual-reg: '%0' }
|
||||
body: |
|
||||
bb.0.entry:
|
||||
liveins: $z0_z1_z2_z3
|
||||
|
||||
; CHECK-LABEL: name: spills_fills_stack_id_zpr4
|
||||
; CHECK: stack:
|
||||
; CHECK: - { id: 0, name: '', type: spill-slot, offset: 0, size: 64, alignment: 16
|
||||
; CHECK-NEXT: stack-id: sve-vec
|
||||
|
||||
; EXPAND-LABEL: name: spills_fills_stack_id_zpr4
|
||||
; EXPAND: STR_ZXI $z0, $sp, 0
|
||||
; EXPAND: STR_ZXI $z1, $sp, 1
|
||||
; EXPAND: STR_ZXI $z2, $sp, 2
|
||||
; EXPAND: STR_ZXI $z3, $sp, 3
|
||||
; EXPAND: $z0 = LDR_ZXI $sp, 0
|
||||
; EXPAND: $z1 = LDR_ZXI $sp, 1
|
||||
; EXPAND: $z2 = LDR_ZXI $sp, 2
|
||||
; EXPAND: $z3 = LDR_ZXI $sp, 3
|
||||
|
||||
%0:zpr4 = COPY $z0_z1_z2_z3
|
||||
|
||||
$z0_z1_z2_z3 = IMPLICIT_DEF
|
||||
$z4_z5_z6_z7 = IMPLICIT_DEF
|
||||
$z8_z9_z10_z11 = IMPLICIT_DEF
|
||||
$z12_z13_z14_z15 = IMPLICIT_DEF
|
||||
$z16_z17_z18_z19 = IMPLICIT_DEF
|
||||
$z20_z21_z22_z23 = IMPLICIT_DEF
|
||||
$z24_z25_z26_z27 = IMPLICIT_DEF
|
||||
$z28_z29_z30_z31 = IMPLICIT_DEF
|
||||
|
||||
$z0_z1_z2_z3 = COPY %0
|
||||
RET_ReallyLR
|
||||
...
|
||||
|
|
Loading…
Reference in New Issue