[RISCV] Fix offset computation for RVV

In D97111 we changed the RVV frame layout when using sp or bp to address
the stack slots so we could address the emergency stack slot. The idea
is to put the RVV objects as far as possible (in offset terms) from the
frame reference register (sp / fp / bp).

When using fp this happens naturally because the RVV objects are already
the top of the stack and due to the constraints of RVV (VLENB being a
power of two >= 128) the stack remains aligned. The rest of this summary
does not apply to this case.

When using sp / bp we need to skip the non-RVV stack slots. The size of
the the non-RVV objects is computed subtracting the callee saved
register size (whose computation is added in D97111 itself) to the total
size of the stack (which does not account for RVV stack slots). However,
when doing so we round to 16 bytes when computing that size and we end
emitting a smaller offset that may belong to a scalar stack slot (see
D98801). So this change removes that rounding.

Also, because we want the RVV objects be between the non-RVV stack slots
and the callee-saved register slots, we need to make sure the RVV
objects are properly aligned to 8 bytes. Adding a padding of 8 would
render the stack unaligned. So when allocating space for RVV (only when
we don't use fp) we need to have extra padding that preserves the stack
alignment. This way we can round to 8 bytes the offset that skips the
non-RVV objects and we do not misalign the whole stack in the way. In
some circumstances this means that the RVV objects may have padding
before (=lower offsets from sp/bp) and after (before the CSR stack
slots).

Differential Revision: https://reviews.llvm.org/D98802
This commit is contained in:
Roger Ferrer Ibanez 2021-03-18 10:26:33 +00:00
parent 3abd0bacc2
commit ef76a333fa
9 changed files with 122 additions and 76 deletions

View File

@ -323,7 +323,7 @@ void RISCVFrameLowering::adjustStackForRVV(MachineFunction &MF,
// 2. SP = SP - RVV stack size
BuildMI(MBB, MBBI, DL, TII->get(Opc), SPReg)
.addReg(SPReg)
.addReg(FactorRegister);
.addReg(FactorRegister, RegState::Kill);
}
void RISCVFrameLowering::emitPrologue(MachineFunction &MF,
@ -385,7 +385,7 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF,
// FIXME (note copied from Lanai): This appears to be overallocating. Needs
// investigation. Get the number of bytes to allocate from the FrameInfo.
uint64_t StackSize = MFI.getStackSize();
uint64_t StackSize = MFI.getStackSize() + RVFI->getRVVPadding();
uint64_t RealStackSize = StackSize + RVFI->getLibCallStackSize();
uint64_t RVVStackSize = RVFI->getRVVStackSize();
@ -560,7 +560,7 @@ void RISCVFrameLowering::emitEpilogue(MachineFunction &MF,
if (!CSI.empty())
LastFrameDestroy = std::prev(MBBI, CSI.size());
uint64_t StackSize = MFI.getStackSize();
uint64_t StackSize = MFI.getStackSize() + RVFI->getRVVPadding();
uint64_t RealStackSize = StackSize + RVFI->getLibCallStackSize();
uint64_t FPOffset = RealStackSize - RVFI->getVarArgsSaveSize();
uint64_t RVVStackSize = RVFI->getRVVStackSize();
@ -637,7 +637,8 @@ RISCVFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
if (FirstSPAdjustAmount)
Offset += StackOffset::getFixed(FirstSPAdjustAmount);
else
Offset += StackOffset::getFixed(MFI.getStackSize());
Offset +=
StackOffset::getFixed(MFI.getStackSize() + RVFI->getRVVPadding());
} else if (RI->needsStackRealignment(MF) && !MFI.isFixedObjectIndex(FI)) {
// If the stack was realigned, the frame pointer is set in order to allow
// SP to be restored, so we need another base register to record the stack
@ -645,40 +646,63 @@ RISCVFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
if (hasBP(MF)) {
FrameReg = RISCVABI::getBPReg();
// |--------------------------| -- <-- FP
// | callee-saved registers | | <---------.
// |--------------------------| -- |
// | realignment (the size of | | |
// | this area is not counted | | |
// | in MFI.getStackSize()) | | |
// |--------------------------| -- |-- MFI.getStackSize()
// | RVV objects | | |
// |--------------------------| -- |
// | scalar local variables | | <---------'
// | callee-saved registers | | <----.
// |--------------------------| -- |
// | realignment (the size of | | |
// | this area is not counted | | |
// | in MFI.getStackSize()) | | |
// |--------------------------| -- |
// | Padding after RVV | | |
// | (not counted in | | |
// | MFI.getStackSize() | | |
// |--------------------------| -- |-- MFI.getStackSize()
// | RVV objects | | |
// | (not counted in | | |
// | MFI.getStackSize() | | |
// |--------------------------| -- |
// | Padding before RVV | | |
// | (not counted in | | |
// | MFI.getStackSize() | | |
// |--------------------------| -- |
// | scalar local variables | | <----'
// |--------------------------| -- <-- BP
// | VarSize objects | |
// |--------------------------| -- <-- SP
} else {
FrameReg = RISCV::X2;
// |--------------------------| -- <-- FP
// | callee-saved registers | | <---------.
// |--------------------------| -- |
// | realignment (the size of | | |
// | this area is not counted | | |
// | in MFI.getStackSize()) | | |
// |--------------------------| -- |-- MFI.getStackSize()
// | RVV objects | | |
// |--------------------------| -- |
// | scalar local variables | | <---------'
// | callee-saved registers | | <----.
// |--------------------------| -- |
// | realignment (the size of | | |
// | this area is not counted | | |
// | in MFI.getStackSize()) | | |
// |--------------------------| -- |
// | Padding after RVV | | |
// | (not counted in | | |
// | MFI.getStackSize() | | |
// |--------------------------| -- |-- MFI.getStackSize()
// | RVV objects | | |
// | (not counted in | | |
// | MFI.getStackSize() | | |
// |--------------------------| -- |
// | Padding before RVV | | |
// | (not counted in | | |
// | MFI.getStackSize() | | |
// |--------------------------| -- |
// | scalar local variables | | <----'
// |--------------------------| -- <-- SP
}
// The total amount of padding surrounding RVV objects is described by
// RVV->getRVVPadding() and it can be zero. It allows us to align the RVV
// objects to 8 bytes.
if (MFI.getStackID(FI) == TargetStackID::Default) {
Offset += StackOffset::getFixed(MFI.getStackSize());
if (FI < 0)
Offset += StackOffset::getFixed(RVFI->getLibCallStackSize());
} else if (MFI.getStackID(FI) == TargetStackID::ScalableVector) {
Offset +=
StackOffset::get(MFI.getStackSize() - RVFI->getCalleeSavedStackSize(),
RVFI->getRVVStackSize());
Offset += StackOffset::get(
alignTo(MFI.getStackSize() - RVFI->getCalleeSavedStackSize(), 8),
RVFI->getRVVStackSize());
}
} else {
FrameReg = RI->getFrameRegister(MF);
@ -704,20 +728,34 @@ RISCVFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
// When using SP to access frame objects, we need to add RVV stack size.
//
// |--------------------------| -- <-- FP
// | callee-saved registers | |<--------.
// |--------------------------| -- |
// | RVV objects | | |-- MFI.getStackSize()
// |--------------------------| -- |
// | scalar local variables | |<--------'
// | callee-saved registers | | <----.
// |--------------------------| -- |
// | Padding after RVV | | |
// | (not counted in | | |
// | MFI.getStackSize() | | |
// |--------------------------| -- |
// | RVV objects | | |-- MFI.getStackSize()
// | (not counted in | | |
// | MFI.getStackSize() | | |
// |--------------------------| -- |
// | Padding before RVV | | |
// | (not counted in | | |
// | MFI.getStackSize() | | |
// |--------------------------| -- |
// | scalar local variables | | <----'
// |--------------------------| -- <-- SP
//
// The total amount of padding surrounding RVV objects is described by
// RVV->getRVVPadding() and it can be zero. It allows us to align the RVV
// objects to 8 bytes.
if (MFI.getStackID(FI) == TargetStackID::Default) {
Offset += StackOffset::getFixed(MFI.getStackSize());
if (FI < 0)
Offset += StackOffset::getFixed(RVFI->getLibCallStackSize());
} else if (MFI.getStackID(FI) == TargetStackID::ScalableVector) {
Offset += StackOffset::get(MFI.getStackSize() -
RVFI->getCalleeSavedStackSize(),
RVFI->getRVVStackSize());
Offset += StackOffset::get(
alignTo(MFI.getStackSize() - RVFI->getCalleeSavedStackSize(), 8),
RVFI->getRVVStackSize());
}
}
}
@ -822,32 +860,29 @@ void RISCVFrameLowering::processFunctionBeforeFrameFinalized(
RegInfo->getSpillAlign(*RC), false);
RS->addScavengingFrameIndex(RegScavFI);
}
}
void RISCVFrameLowering::processFunctionBeforeFrameIndicesReplaced(
MachineFunction &MF, RegScavenger *RS) const {
auto *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();
const MachineFrameInfo &MFI = MF.getFrameInfo();
if (MFI.getCalleeSavedInfo().empty() || RVFI->useSaveRestoreLibCalls(MF)) {
RVFI->setCalleeSavedStackSize(0);
return;
}
int64_t MinOffset = std::numeric_limits<int64_t>::max();
int64_t MaxOffset = std::numeric_limits<int64_t>::min();
unsigned Size = 0;
for (const auto &Info : MFI.getCalleeSavedInfo()) {
int FrameIdx = Info.getFrameIdx();
if (MFI.getStackID(FrameIdx) != TargetStackID::Default)
continue;
int64_t Offset = MFI.getObjectOffset(FrameIdx);
int64_t ObjSize = MFI.getObjectSize(FrameIdx);
MinOffset = std::min<int64_t>(Offset, MinOffset);
MaxOffset = std::max<int64_t>(Offset + ObjSize, MaxOffset);
Size += MFI.getObjectSize(FrameIdx);
}
unsigned Size = alignTo(MaxOffset - MinOffset, 16);
RVFI->setCalleeSavedStackSize(Size);
// Padding required to keep the RVV stack aligned to 8 bytes
// within the main stack. We only need this when not using FP.
if (RVVStackSize && !hasFP(MF) && Size % 8 != 0) {
// Because we add the padding to the size of the stack, adding
// getStackAlign() will keep it aligned.
RVFI->setRVVPadding(getStackAlign().value());
}
}
// Not preserve stack space within prologue for outgoing variables when the

View File

@ -68,9 +68,6 @@ public:
bool isSupportedStackID(TargetStackID::Value ID) const override;
TargetStackID::Value getStackIDForScalableVectors() const override;
void processFunctionBeforeFrameIndicesReplaced(
MachineFunction &MF, RegScavenger *RS = nullptr) const override;
protected:
const RISCVSubtarget &STI;

View File

@ -34,6 +34,8 @@ private:
unsigned LibCallStackSize = 0;
/// Size of RVV stack.
uint64_t RVVStackSize = 0;
/// Padding required to keep RVV stack aligned within the main stack.
uint64_t RVVPadding = 0;
/// Size of stack frame to save callee saved registers
unsigned CalleeSavedStackSize = 0;
@ -66,6 +68,9 @@ public:
uint64_t getRVVStackSize() const { return RVVStackSize; }
void setRVVStackSize(uint64_t Size) { RVVStackSize = Size; }
uint64_t getRVVPadding() const { return RVVPadding; }
void setRVVPadding(uint64_t Padding) { RVVPadding = Padding; }
unsigned getCalleeSavedStackSize() const { return CalleeSavedStackSize; }
void setCalleeSavedStackSize(unsigned Size) { CalleeSavedStackSize = Size; }
};

View File

@ -256,15 +256,15 @@ define void @local_var_m2_with_bp(i64 %n) {
; RV64IV-NEXT: csrr a2, vlenb
; RV64IV-NEXT: slli a2, a2, 1
; RV64IV-NEXT: add a2, s1, a2
; RV64IV-NEXT: addi a2, a2, 224
; RV64IV-NEXT: addi a2, a2, 232
; RV64IV-NEXT: call notdead2@plt
; RV64IV-NEXT: lw a0, 124(s1)
; RV64IV-NEXT: csrr a0, vlenb
; RV64IV-NEXT: slli a0, a0, 1
; RV64IV-NEXT: add a0, s1, a0
; RV64IV-NEXT: addi a0, a0, 224
; RV64IV-NEXT: addi a0, a0, 232
; RV64IV-NEXT: vl2r.v v26, (a0)
; RV64IV-NEXT: addi a0, s1, 224
; RV64IV-NEXT: addi a0, s1, 232
; RV64IV-NEXT: vl2r.v v26, (a0)
; RV64IV-NEXT: lw a0, 120(s1)
; RV64IV-NEXT: addi sp, s0, -256

View File

@ -9,50 +9,56 @@
define <vscale x 1 x double> @foo(<vscale x 1 x double> %a, <vscale x 1 x double> %b, <vscale x 1 x double> %c, i32 %gvl) nounwind
; SPILL-O0-LABEL: foo:
; SPILL-O0: # %bb.0:
; SPILL-O0-NEXT: addi sp, sp, -16
; SPILL-O0-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; SPILL-O0-NEXT: addi sp, sp, -32
; SPILL-O0-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
; SPILL-O0-NEXT: csrr a1, vlenb
; SPILL-O0-NEXT: slli a1, a1, 1
; SPILL-O0-NEXT: sub sp, sp, a1
; SPILL-O0-NEXT: sw a0, 8(sp) # 4-byte Folded Spill
; SPILL-O0-NEXT: csrr a1, vlenb
; SPILL-O0-NEXT: add a1, sp, a1
; SPILL-O0-NEXT: addi a1, a1, 16
; SPILL-O0-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill
; SPILL-O0-NEXT: vsetvli a0, a0, e64,m1,ta,mu
; SPILL-O0-NEXT: vfadd.vv v25, v8, v9
; SPILL-O0-NEXT: vs1r.v v25, (sp) # Unknown-size Folded Spill
; SPILL-O0-NEXT: addi a0, sp, 16
; SPILL-O0-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill
; SPILL-O0-NEXT: lui a0, %hi(.L.str)
; SPILL-O0-NEXT: addi a0, a0, %lo(.L.str)
; SPILL-O0-NEXT: call puts@plt
; SPILL-O0-NEXT: vl1r.v v25, (sp) # Unknown-size Folded Reload
; SPILL-O0-NEXT: addi a1, sp, 16
; SPILL-O0-NEXT: vl1r.v v25, (a1) # Unknown-size Folded Reload
; SPILL-O0-NEXT: csrr a1, vlenb
; SPILL-O0-NEXT: add a1, sp, a1
; SPILL-O0-NEXT: addi a1, a1, 16
; SPILL-O0-NEXT: vl1r.v v8, (a1) # Unknown-size Folded Reload
; SPILL-O0-NEXT: # kill: def $x11 killed $x10
; SPILL-O0-NEXT: lw a0, 8(sp) # 4-byte Folded Reload
; SPILL-O0-NEXT: lw a0, 8(sp) # 4-byte Folded Reload
; SPILL-O0-NEXT: vsetvli a0, a0, e64,m1,ta,mu
; SPILL-O0-NEXT: vfadd.vv v8, v8, v25
; SPILL-O0-NEXT: csrr a0, vlenb
; SPILL-O0-NEXT: slli a0, a0, 1
; SPILL-O0-NEXT: add sp, sp, a0
; SPILL-O0-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; SPILL-O0-NEXT: addi sp, sp, 16
; SPILL-O0-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; SPILL-O0-NEXT: addi sp, sp, 32
; SPILL-O0-NEXT: ret
;
; SPILL-O2-LABEL: foo:
; SPILL-O2: # %bb.0:
; SPILL-O2-NEXT: addi sp, sp, -16
; SPILL-O2-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; SPILL-O2-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
; SPILL-O2-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
; SPILL-O2-NEXT: csrr a1, vlenb
; SPILL-O2-NEXT: slli a1, a1, 1
; SPILL-O2-NEXT: sub sp, sp, a1
; SPILL-O2-NEXT: mv s0, a0
; SPILL-O2-NEXT: vs1r.v v8, (sp) # Unknown-size Folded Spill
; SPILL-O2-NEXT: addi a1, sp, 8
; SPILL-O2-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill
; SPILL-O2-NEXT: vsetvli a0, a0, e64,m1,ta,mu
; SPILL-O2-NEXT: vfadd.vv v25, v8, v9
; SPILL-O2-NEXT: csrr a0, vlenb
; SPILL-O2-NEXT: add a0, sp, a0
; SPILL-O2-NEXT: addi a0, a0, 8
; SPILL-O2-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill
; SPILL-O2-NEXT: lui a0, %hi(.L.str)
; SPILL-O2-NEXT: addi a0, a0, %lo(.L.str)
@ -60,8 +66,10 @@ define <vscale x 1 x double> @foo(<vscale x 1 x double> %a, <vscale x 1 x double
; SPILL-O2-NEXT: vsetvli a0, s0, e64,m1,ta,mu
; SPILL-O2-NEXT: csrr a0, vlenb
; SPILL-O2-NEXT: add a0, sp, a0
; SPILL-O2-NEXT: addi a0, a0, 8
; SPILL-O2-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload
; SPILL-O2-NEXT: vl1r.v v26, (sp) # Unknown-size Folded Reload
; SPILL-O2-NEXT: addi a0, sp, 8
; SPILL-O2-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload
; SPILL-O2-NEXT: vfadd.vv v8, v26, v25
; SPILL-O2-NEXT: csrr a0, vlenb
; SPILL-O2-NEXT: slli a0, a0, 1
@ -78,4 +86,4 @@ define <vscale x 1 x double> @foo(<vscale x 1 x double> %a, <vscale x 1 x double
}
declare <vscale x 1 x double> @llvm.riscv.vfadd.nxv1f64.nxv1f64(<vscale x 1 x double> %a, <vscale x 1 x double> %b, i32 %gvl)
declare i32 @puts(i8*);
declare i32 @puts(i8*);

View File

@ -17,22 +17,22 @@ define <vscale x 1 x double> @foo(<vscale x 1 x double> %a, <vscale x 1 x double
; SPILL-O0-NEXT: sd a0, 16(sp) # 8-byte Folded Spill
; SPILL-O0-NEXT: csrr a1, vlenb
; SPILL-O0-NEXT: add a1, sp, a1
; SPILL-O0-NEXT: addi a1, a1, 16
; SPILL-O0-NEXT: addi a1, a1, 24
; SPILL-O0-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill
; SPILL-O0-NEXT: vsetvli a0, a0, e64,m1,ta,mu
; SPILL-O0-NEXT: vfadd.vv v25, v8, v9
; SPILL-O0-NEXT: addi a0, sp, 16
; SPILL-O0-NEXT: addi a0, sp, 24
; SPILL-O0-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill
; SPILL-O0-NEXT: lui a0, %hi(.L.str)
; SPILL-O0-NEXT: addi a0, a0, %lo(.L.str)
; SPILL-O0-NEXT: call puts@plt
; SPILL-O0-NEXT: addi a1, sp, 16
; SPILL-O0-NEXT: addi a1, sp, 24
; SPILL-O0-NEXT: vl1r.v v25, (a1) # Unknown-size Folded Reload
; SPILL-O0-NEXT: csrr a1, vlenb
; SPILL-O0-NEXT: add a1, sp, a1
; SPILL-O0-NEXT: addi a1, a1, 16
; SPILL-O0-NEXT: addi a1, a1, 24
; SPILL-O0-NEXT: vl1r.v v8, (a1) # Unknown-size Folded Reload
; SPILL-O0-NEXT: # kill: def $x11 killed $x10
; SPILL-O0-NEXT: # kill: def $x11 killed $x10
; SPILL-O0-NEXT: ld a0, 16(sp) # 8-byte Folded Reload
; SPILL-O0-NEXT: vsetvli a0, a0, e64,m1,ta,mu
; SPILL-O0-NEXT: vfadd.vv v8, v8, v25

View File

@ -107,9 +107,9 @@ define void @rvv_vla_and_overaligned(i64 %n, i64 %i) nounwind {
; CHECK-NEXT: csrr a2, vlenb
; CHECK-NEXT: slli a2, a2, 1
; CHECK-NEXT: add a2, s1, a2
; CHECK-NEXT: addi a2, a2, 96
; CHECK-NEXT: addi a2, a2, 104
; CHECK-NEXT: vl1re64.v v25, (a2)
; CHECK-NEXT: addi a2, s1, 96
; CHECK-NEXT: addi a2, s1, 104
; CHECK-NEXT: vl2re64.v v26, (a2)
; CHECK-NEXT: lw a2, 64(s1)
; CHECK-NEXT: slli a1, a1, 2

View File

@ -9,18 +9,19 @@
define void @foo() #0 {
; CHECK-LABEL: foo:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: addi sp, sp, -16
; CHECK-NEXT: sw s9, 12(sp) # 4-byte Folded Spill
; CHECK-NEXT: addi sp, sp, -32
; CHECK-NEXT: sw s9, 28(sp) # 4-byte Folded Spill
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 1
; CHECK-NEXT: sub sp, sp, a1
; CHECK-NEXT: sw a0, 8(sp) # 4-byte Folded Spill
; CHECK-NEXT: vs2r.v v30, (sp) # Unknown-size Folded Spill
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vs2r.v v30, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 1
; CHECK-NEXT: add sp, sp, a0
; CHECK-NEXT: lw s9, 12(sp) # 4-byte Folded Reload
; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: lw s9, 28(sp) # 4-byte Folded Reload
; CHECK-NEXT: addi sp, sp, 32
; CHECK-NEXT: ret
entry:
ret void

View File

@ -15,7 +15,7 @@
; CHECK-NEXT: slli a1, a1, 1
; CHECK-NEXT: sub sp, sp, a1
; CHECK-NEXT: sd a0, 16(sp) # 8-byte Folded Spill
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: addi a0, sp, 24
; CHECK-NEXT: vs2r.v v30, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 1