From 96bbd359edbf8582fc2d29b57d7e65e54e98709b Mon Sep 17 00:00:00 2001 From: Bradley Smith Date: Tue, 26 Apr 2022 12:19:32 +0000 Subject: [PATCH] [AArch64][SVE] Only fold frame indexes referencing SVE objects into SVE loads/stores Currently we always fold frame indexes into SVE load/store instructions, however these instructions can only encode VL scaled offests. This means that when we are accessing a fixed length stack object with these instructions, the folded in frame index gets pulled back out during frame lowering. This can cause issues when we have no spare registers and no emergency spill slot. Rather than causing issues like this, don't fold in frame indexes that reference fixed length objects. Fixes: #55041 Differential Revision: https://reviews.llvm.org/D124457 --- .../Target/AArch64/AArch64ISelDAGToDAG.cpp | 18 +++++++--- .../sve-fixed-length-frame-offests-crash.ll | 36 +++++++++++++++++++ .../AArch64/sve-fixed-length-frame-offests.ll | 31 ++++++++++++++++ 3 files changed, 81 insertions(+), 4 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/sve-fixed-length-frame-offests-crash.ll create mode 100644 llvm/test/CodeGen/AArch64/sve-fixed-length-frame-offests.ll diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp index c367d2db853d..71911b6bc614 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -5092,12 +5092,19 @@ bool AArch64DAGToDAGISel::SelectAddrModeIndexedSVE(SDNode *Root, SDValue N, SDValue &OffImm) { const EVT MemVT = getMemVTFromNode(*(CurDAG->getContext()), Root); const DataLayout &DL = CurDAG->getDataLayout(); + const MachineFrameInfo &MFI = MF->getFrameInfo(); if (N.getOpcode() == ISD::FrameIndex) { int FI = cast(N)->getIndex(); - Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); - OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64); - return true; + // We can only encode VL scaled offsets, so only fold in frame indexes + // referencing SVE objects. + if (FI == 0 || MFI.getStackID(FI) == TargetStackID::ScalableVector) { + Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); + OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64); + return true; + } + + return false; } if (MemVT == EVT()) @@ -5124,7 +5131,10 @@ bool AArch64DAGToDAGISel::SelectAddrModeIndexedSVE(SDNode *Root, SDValue N, Base = N.getOperand(0); if (Base.getOpcode() == ISD::FrameIndex) { int FI = cast(Base)->getIndex(); - Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); + // We can only encode VL scaled offsets, so only fold in frame indexes + // referencing SVE objects. + if (FI == 0 || MFI.getStackID(FI) == TargetStackID::ScalableVector) + Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); } OffImm = CurDAG->getTargetConstant(Offset, SDLoc(N), MVT::i64); diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-frame-offests-crash.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-frame-offests-crash.ll new file mode 100644 index 000000000000..da11e6b36f35 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-frame-offests-crash.ll @@ -0,0 +1,36 @@ +; RUN: llc < %s | FileCheck %s + +target triple = "aarch64-unknown-linux-gnu" + +; Ensure we don't crash by trying to fold fixed length frame indexes into +; loads/stores that don't support an appropriate addressing mode, hence creating +; too many extra vregs during frame lowering, when we don't have an emergency +; spill slot. + +define dso_local void @func1(i64* %v1, i64* %v2, i64* %v3, i64* %v4, i64* %v5, i64* %v6, i64* %v7, i64* %v8, + i64* %v9, i64* %v10, i64* %v11, i64* %v12, i64* %v13, i64* %v14, i64* %v15, i64* %v16, + i64* %v17, i64* %v18, i64* %v19, i64* %v20, i64* %v21, i64* %v22, i64* %v23, i64* %v24, + i64* %v25, i64* %v26, i64* %v27, i64* %v28, i64* %v29, i64* %v30, i64* %v31, i64* %v32, + i64* %v33, i64* %v34, i64* %v35, i64* %v36, i64* %v37, i64* %v38, i64* %v39, i64* %v40, + i64* %v41, i64* %v42, i64* %v43, i64* %v44, i64* %v45, i64* %v46, i64* %v47, i64* %v48, + i64 %v49) #0 { +; CHECK-LABEL: func1 + tail call void @func2(i64* %v1, i64* %v2, i64* %v3, i64* %v4, i64* %v5, i64* %v6, i64* %v7, i64* %v8, + i64* %v9, i64* %v10, i64* %v11, i64* %v12, i64* undef, i64* %v14, i64* %v15, i64* %v16, + i64* %v17, i64* %v18, i64* %v19, i64* %v20, i64* %v21, i64* %v22, i64* %v23, i64* %v24, + i64* %v25, i64* %v26, i64* %v27, i64* %v28, i64* %v29, i64* %v30, i64* undef, i64* undef, + i64* undef, i64* undef, i64* undef, i64* undef, i64* %v37, i64* %v38, i64* %v39, i64* %v40, + i64* %v41, i64* %v42, i64* %v43, i64* %v44, i64* %v45, i64* undef, i64* %v47, i64* %v48, + i64 undef) + ret void +} + +declare dso_local void @func2(i64*, i64*, i64*, i64*, i64*, i64*, i64*, i64*, + i64*, i64*, i64*, i64*, i64*, i64*, i64*, i64*, + i64*, i64*, i64*, i64*, i64*, i64*, i64*, i64*, + i64*, i64*, i64*, i64*, i64*, i64*, i64*, i64*, + i64*, i64*, i64*, i64*, i64*, i64*, i64*, i64*, + i64*, i64*, i64*, i64*, i64*, i64*, i64*, i64*, + i64) + +attributes #0 = { "target-features"="+sve" vscale_range(2,2) } diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-frame-offests.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-frame-offests.ll new file mode 100644 index 000000000000..9227c4caf0cd --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-frame-offests.ll @@ -0,0 +1,31 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -debug-only=isel < %s 2>&1 | FileCheck %s + +; REQUIRES: asserts + +target triple = "aarch64-unknown-linux-gnu" + +; Ensure that only no offset frame indexes are folded into SVE load/stores when +; accessing fixed width objects. +define void @foo(<8 x i64>* %a) #0 { +; CHECK-LABEL: foo: +; CHECK: SelectionDAG has 14 nodes: +; CHECK-NEXT: t0: ch = EntryToken +; CHECK-NEXT: t12: nxv2i1 = PTRUE_D TargetConstant:i32<31> +; CHECK-NEXT: t2: i64,ch = CopyFromReg t0, Register:i64 %0 +; CHECK-NEXT: t18: nxv2i64,ch = LD1D_IMM t12, t2, TargetConstant:i64<0>, t0 +; CHECK-NEXT: t8: i64 = ADDXri TargetFrameIndex:i64<1>, TargetConstant:i32<0>, TargetConstant:i32<0> +; CHECK-NEXT: t17: ch = ST1D_IMM t18, t12, TargetFrameIndex:i64<0>, TargetConstant:i64<0>, t0 +; CHECK-NEXT: t16: ch = ST1D_IMM t18, t12, t8, TargetConstant:i64<0>, t17 +; CHECK-NEXT: t10: ch = RET_ReallyLR t16 +; CHECK-EMPTY: +entry: + %r0 = alloca <8 x i64> + %r1 = alloca <8 x i64> + %r = load volatile <8 x i64>, <8 x i64>* %a + store volatile <8 x i64> %r, <8 x i64>* %r0 + store volatile <8 x i64> %r, <8 x i64>* %r1 + ret void +} + +attributes #0 = { nounwind "target-features"="+sve" vscale_range(4,4) }