[AArch64][SVE] Only fold frame indexes referencing SVE objects into SVE loads/stores

Currently we always fold frame indexes into SVE load/store instructions,
however these instructions can only encode VL scaled offests. This means
that when we are accessing a fixed length stack object with these
instructions, the folded in frame index gets pulled back out during frame
lowering. This can cause issues when we have no spare registers and no
emergency spill slot.

Rather than causing issues like this, don't fold in frame indexes that
reference fixed length objects.

Fixes: #55041

Differential Revision: https://reviews.llvm.org/D124457
This commit is contained in:
Bradley Smith 2022-04-26 12:19:32 +00:00
parent f496a0eba4
commit 96bbd359ed
3 changed files with 81 additions and 4 deletions

View File

@ -5092,12 +5092,19 @@ bool AArch64DAGToDAGISel::SelectAddrModeIndexedSVE(SDNode *Root, SDValue N,
SDValue &OffImm) {
const EVT MemVT = getMemVTFromNode(*(CurDAG->getContext()), Root);
const DataLayout &DL = CurDAG->getDataLayout();
const MachineFrameInfo &MFI = MF->getFrameInfo();
if (N.getOpcode() == ISD::FrameIndex) {
int FI = cast<FrameIndexSDNode>(N)->getIndex();
Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64);
return true;
// We can only encode VL scaled offsets, so only fold in frame indexes
// referencing SVE objects.
if (FI == 0 || MFI.getStackID(FI) == TargetStackID::ScalableVector) {
Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64);
return true;
}
return false;
}
if (MemVT == EVT())
@ -5124,7 +5131,10 @@ bool AArch64DAGToDAGISel::SelectAddrModeIndexedSVE(SDNode *Root, SDValue N,
Base = N.getOperand(0);
if (Base.getOpcode() == ISD::FrameIndex) {
int FI = cast<FrameIndexSDNode>(Base)->getIndex();
Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
// We can only encode VL scaled offsets, so only fold in frame indexes
// referencing SVE objects.
if (FI == 0 || MFI.getStackID(FI) == TargetStackID::ScalableVector)
Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
}
OffImm = CurDAG->getTargetConstant(Offset, SDLoc(N), MVT::i64);

View File

@ -0,0 +1,36 @@
; RUN: llc < %s | FileCheck %s
target triple = "aarch64-unknown-linux-gnu"
; Ensure we don't crash by trying to fold fixed length frame indexes into
; loads/stores that don't support an appropriate addressing mode, hence creating
; too many extra vregs during frame lowering, when we don't have an emergency
; spill slot.
define dso_local void @func1(i64* %v1, i64* %v2, i64* %v3, i64* %v4, i64* %v5, i64* %v6, i64* %v7, i64* %v8,
i64* %v9, i64* %v10, i64* %v11, i64* %v12, i64* %v13, i64* %v14, i64* %v15, i64* %v16,
i64* %v17, i64* %v18, i64* %v19, i64* %v20, i64* %v21, i64* %v22, i64* %v23, i64* %v24,
i64* %v25, i64* %v26, i64* %v27, i64* %v28, i64* %v29, i64* %v30, i64* %v31, i64* %v32,
i64* %v33, i64* %v34, i64* %v35, i64* %v36, i64* %v37, i64* %v38, i64* %v39, i64* %v40,
i64* %v41, i64* %v42, i64* %v43, i64* %v44, i64* %v45, i64* %v46, i64* %v47, i64* %v48,
i64 %v49) #0 {
; CHECK-LABEL: func1
tail call void @func2(i64* %v1, i64* %v2, i64* %v3, i64* %v4, i64* %v5, i64* %v6, i64* %v7, i64* %v8,
i64* %v9, i64* %v10, i64* %v11, i64* %v12, i64* undef, i64* %v14, i64* %v15, i64* %v16,
i64* %v17, i64* %v18, i64* %v19, i64* %v20, i64* %v21, i64* %v22, i64* %v23, i64* %v24,
i64* %v25, i64* %v26, i64* %v27, i64* %v28, i64* %v29, i64* %v30, i64* undef, i64* undef,
i64* undef, i64* undef, i64* undef, i64* undef, i64* %v37, i64* %v38, i64* %v39, i64* %v40,
i64* %v41, i64* %v42, i64* %v43, i64* %v44, i64* %v45, i64* undef, i64* %v47, i64* %v48,
i64 undef)
ret void
}
declare dso_local void @func2(i64*, i64*, i64*, i64*, i64*, i64*, i64*, i64*,
i64*, i64*, i64*, i64*, i64*, i64*, i64*, i64*,
i64*, i64*, i64*, i64*, i64*, i64*, i64*, i64*,
i64*, i64*, i64*, i64*, i64*, i64*, i64*, i64*,
i64*, i64*, i64*, i64*, i64*, i64*, i64*, i64*,
i64*, i64*, i64*, i64*, i64*, i64*, i64*, i64*,
i64)
attributes #0 = { "target-features"="+sve" vscale_range(2,2) }

View File

@ -0,0 +1,31 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -debug-only=isel < %s 2>&1 | FileCheck %s
; REQUIRES: asserts
target triple = "aarch64-unknown-linux-gnu"
; Ensure that only no offset frame indexes are folded into SVE load/stores when
; accessing fixed width objects.
define void @foo(<8 x i64>* %a) #0 {
; CHECK-LABEL: foo:
; CHECK: SelectionDAG has 14 nodes:
; CHECK-NEXT: t0: ch = EntryToken
; CHECK-NEXT: t12: nxv2i1 = PTRUE_D TargetConstant:i32<31>
; CHECK-NEXT: t2: i64,ch = CopyFromReg t0, Register:i64 %0
; CHECK-NEXT: t18: nxv2i64,ch = LD1D_IMM<Mem:(volatile load (s512) from %ir.a)> t12, t2, TargetConstant:i64<0>, t0
; CHECK-NEXT: t8: i64 = ADDXri TargetFrameIndex:i64<1>, TargetConstant:i32<0>, TargetConstant:i32<0>
; CHECK-NEXT: t17: ch = ST1D_IMM<Mem:(volatile store (s512) into %ir.r0)> t18, t12, TargetFrameIndex:i64<0>, TargetConstant:i64<0>, t0
; CHECK-NEXT: t16: ch = ST1D_IMM<Mem:(volatile store (s512) into %ir.r1)> t18, t12, t8, TargetConstant:i64<0>, t17
; CHECK-NEXT: t10: ch = RET_ReallyLR t16
; CHECK-EMPTY:
entry:
%r0 = alloca <8 x i64>
%r1 = alloca <8 x i64>
%r = load volatile <8 x i64>, <8 x i64>* %a
store volatile <8 x i64> %r, <8 x i64>* %r0
store volatile <8 x i64> %r, <8 x i64>* %r1
ret void
}
attributes #0 = { nounwind "target-features"="+sve" vscale_range(4,4) }