[RISCV] Avoid scalar outgoing argumetns overwriting vector frame objects.

When using FP to access stack objects, the scalable stack objects will
be put at the lower end of the frame. It looks like

```
|-------------------|  <-- FP
| callee-saved regs |
|-------------------|
| scalar local vars |
|-------------------|
| RVV local vars    |
|-------------------|  <-- SP
```

If there are scalar arguments that need to pass through memory and there
are vector objects on the stack using FP to access. The outgoing scalar
arguments will overwrite the vector objects. It looks like

```
|-------------------|  <-- FP
| callee-saved regs |
|-------------------|
| scalar local vars |
|-------------------|         |-------------------|
| RVV local vars    |         | outgoing args     | <- outgoing arguments
|-------------------|  <-- SP |-------------------|    overwrite from here.
```

In this patch, we reserve the stack for the outgoing arguments before
function calls if using FP to access and there are scalable vector frame
objects. It looks like

```
|-------------------|  <-- FP
| callee-saved regs |
|-------------------|
| scalar local vars |
|-------------------|
| RVV local vars    |
|-------------------|
| outgoing args     |
|-------------------|  <-- SP
```

Differential Revision: https://reviews.llvm.org/D103622
This commit is contained in:
Hsiangkai Wang 2021-06-03 23:45:49 +08:00
parent 54418c5a35
commit 643b6407fa
2 changed files with 27 additions and 13 deletions

View File

@ -911,11 +911,21 @@ void RISCVFrameLowering::processFunctionBeforeFrameFinalized(
} }
} }
static bool hasRVVFrameObject(const MachineFunction &MF) {
const MachineFrameInfo &MFI = MF.getFrameInfo();
for (int I = 0, E = MFI.getObjectIndexEnd(); I != E; ++I)
if (MFI.getStackID(I) == TargetStackID::ScalableVector)
return true;
return false;
}
// Not preserve stack space within prologue for outgoing variables when the // Not preserve stack space within prologue for outgoing variables when the
// function contains variable size objects and let eliminateCallFramePseudoInstr // function contains variable size objects or there are vector objects accessed
// preserve stack space for it. // by the frame pointer.
// Let eliminateCallFramePseudoInstr preserve stack space for it.
bool RISCVFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const { bool RISCVFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
return !MF.getFrameInfo().hasVarSizedObjects(); return !MF.getFrameInfo().hasVarSizedObjects() &&
!(hasFP(MF) && hasRVVFrameObject(MF));
} }
// Eliminate ADJCALLSTACKDOWN, ADJCALLSTACKUP pseudo instructions. // Eliminate ADJCALLSTACKDOWN, ADJCALLSTACKUP pseudo instructions.

View File

@ -65,10 +65,10 @@ entry:
define dso_local signext i32 @main() #0 { define dso_local signext i32 @main() #0 {
; CHECK-LABEL: main: ; CHECK-LABEL: main:
; CHECK: # %bb.0: # %entry ; CHECK: # %bb.0: # %entry
; CHECK-NEXT: addi sp, sp, -128 ; CHECK-NEXT: addi sp, sp, -112
; CHECK-NEXT: sd ra, 120(sp) # 8-byte Folded Spill ; CHECK-NEXT: sd ra, 104(sp) # 8-byte Folded Spill
; CHECK-NEXT: sd s0, 112(sp) # 8-byte Folded Spill ; CHECK-NEXT: sd s0, 96(sp) # 8-byte Folded Spill
; CHECK-NEXT: addi s0, sp, 128 ; CHECK-NEXT: addi s0, sp, 112
; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: sub sp, sp, a0 ; CHECK-NEXT: sub sp, sp, a0
@ -84,7 +84,7 @@ define dso_local signext i32 @main() #0 {
; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: sub a0, s0, a0 ; CHECK-NEXT: sub a0, s0, a0
; CHECK-NEXT: addi a0, a0, -128 ; CHECK-NEXT: addi a0, a0, -112
; CHECK-NEXT: vs8r.v v8, (a0) ; CHECK-NEXT: vs8r.v v8, (a0)
; CHECK-NEXT: addi a0, zero, 1 ; CHECK-NEXT: addi a0, zero, 1
; CHECK-NEXT: sw a0, -68(s0) ; CHECK-NEXT: sw a0, -68(s0)
@ -102,7 +102,7 @@ define dso_local signext i32 @main() #0 {
; CHECK-NEXT: csrr a2, vlenb ; CHECK-NEXT: csrr a2, vlenb
; CHECK-NEXT: slli a2, a2, 3 ; CHECK-NEXT: slli a2, a2, 3
; CHECK-NEXT: sub a2, s0, a2 ; CHECK-NEXT: sub a2, s0, a2
; CHECK-NEXT: addi a2, a2, -128 ; CHECK-NEXT: addi a2, a2, -112
; CHECK-NEXT: vl8re32.v v8, (a2) ; CHECK-NEXT: vl8re32.v v8, (a2)
; CHECK-NEXT: lw a2, -76(s0) ; CHECK-NEXT: lw a2, -76(s0)
; CHECK-NEXT: lw a3, -80(s0) ; CHECK-NEXT: lw a3, -80(s0)
@ -112,15 +112,17 @@ define dso_local signext i32 @main() #0 {
; CHECK-NEXT: lw a7, -96(s0) ; CHECK-NEXT: lw a7, -96(s0)
; CHECK-NEXT: lw t0, -100(s0) ; CHECK-NEXT: lw t0, -100(s0)
; CHECK-NEXT: lw t1, -104(s0) ; CHECK-NEXT: lw t1, -104(s0)
; CHECK-NEXT: addi sp, sp, -16
; CHECK-NEXT: sd t1, 8(sp) ; CHECK-NEXT: sd t1, 8(sp)
; CHECK-NEXT: sd t0, 0(sp) ; CHECK-NEXT: sd t0, 0(sp)
; CHECK-NEXT: call lots_args ; CHECK-NEXT: call lots_args
; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: lw a0, -68(s0) ; CHECK-NEXT: lw a0, -68(s0)
; CHECK-NEXT: lw a1, -72(s0) ; CHECK-NEXT: lw a1, -72(s0)
; CHECK-NEXT: csrr a2, vlenb ; CHECK-NEXT: csrr a2, vlenb
; CHECK-NEXT: slli a2, a2, 3 ; CHECK-NEXT: slli a2, a2, 3
; CHECK-NEXT: sub a2, s0, a2 ; CHECK-NEXT: sub a2, s0, a2
; CHECK-NEXT: addi a2, a2, -128 ; CHECK-NEXT: addi a2, a2, -112
; CHECK-NEXT: vl8re32.v v8, (a2) ; CHECK-NEXT: vl8re32.v v8, (a2)
; CHECK-NEXT: lw a2, -76(s0) ; CHECK-NEXT: lw a2, -76(s0)
; CHECK-NEXT: lw a3, -80(s0) ; CHECK-NEXT: lw a3, -80(s0)
@ -130,16 +132,18 @@ define dso_local signext i32 @main() #0 {
; CHECK-NEXT: lw a7, -96(s0) ; CHECK-NEXT: lw a7, -96(s0)
; CHECK-NEXT: lw t0, -100(s0) ; CHECK-NEXT: lw t0, -100(s0)
; CHECK-NEXT: lw t1, -104(s0) ; CHECK-NEXT: lw t1, -104(s0)
; CHECK-NEXT: addi sp, sp, -16
; CHECK-NEXT: sd t1, 8(sp) ; CHECK-NEXT: sd t1, 8(sp)
; CHECK-NEXT: sd t0, 0(sp) ; CHECK-NEXT: sd t0, 0(sp)
; CHECK-NEXT: call lots_args ; CHECK-NEXT: call lots_args
; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: mv a0, zero ; CHECK-NEXT: mv a0, zero
; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 3 ; CHECK-NEXT: slli a1, a1, 3
; CHECK-NEXT: add sp, sp, a1 ; CHECK-NEXT: add sp, sp, a1
; CHECK-NEXT: ld s0, 112(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s0, 96(sp) # 8-byte Folded Reload
; CHECK-NEXT: ld ra, 120(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld ra, 104(sp) # 8-byte Folded Reload
; CHECK-NEXT: addi sp, sp, 128 ; CHECK-NEXT: addi sp, sp, 112
; CHECK-NEXT: ret ; CHECK-NEXT: ret
entry: entry:
%retval = alloca i32, align 4 %retval = alloca i32, align 4