diff --git a/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp b/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp index f0b7f5873797..601a5d303cdb 100644 --- a/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp +++ b/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp @@ -552,7 +552,7 @@ void HexagonFrameLowering::insertPrologueInBlock(MachineBasicBlock &MBB, auto &HRI = *HST.getRegisterInfo(); DebugLoc dl; - unsigned MaxAlign = std::max(MFI.getMaxAlignment(), getStackAlignment()); + unsigned MaxAlign = std::max(getMaxStackAlignment(MF), getStackAlignment()); // Calculate the total stack frame size. // Get the number of bytes to allocate from the FrameInfo. @@ -2354,11 +2354,26 @@ void HexagonFrameLowering::expandAlloca(MachineInstr *AI, } } +unsigned +HexagonFrameLowering::getMaxStackAlignment(const MachineFunction &MF) const { + const MachineFrameInfo &MFI = MF.getFrameInfo(); + // MFI's MaxAlignment can only grow, but we can actually reduce it + // for vector spills. + unsigned MaxAlign = 0; + for (int i = 0, e = MFI.getObjectIndexEnd(); i != e; ++i) { + if (MFI.isDeadObjectIndex(i)) + continue; + unsigned Align = MFI.getObjectAlignment(i); + MaxAlign = std::max(MaxAlign, Align); + } + return MaxAlign; +} + bool HexagonFrameLowering::needsAligna(const MachineFunction &MF) const { const MachineFrameInfo &MFI = MF.getFrameInfo(); if (!MFI.hasVarSizedObjects()) return false; - unsigned MaxA = MFI.getMaxAlignment(); + unsigned MaxA = getMaxStackAlignment(MF); if (MaxA <= getStackAlignment()) return false; return true; diff --git a/llvm/lib/Target/Hexagon/HexagonFrameLowering.h b/llvm/lib/Target/Hexagon/HexagonFrameLowering.h index 529a61d4a5b5..29bafaee78ce 100644 --- a/llvm/lib/Target/Hexagon/HexagonFrameLowering.h +++ b/llvm/lib/Target/Hexagon/HexagonFrameLowering.h @@ -82,6 +82,8 @@ public: const TargetRegisterInfo *TRI, std::vector &CSI) const override; + unsigned getMaxStackAlignment(const MachineFunction &MF) const; + bool needsAligna(const MachineFunction &MF) const; const MachineInstr *getAlignaInstr(const MachineFunction &MF) const; diff --git a/llvm/lib/Target/Hexagon/HexagonRegisterInfo.cpp b/llvm/lib/Target/Hexagon/HexagonRegisterInfo.cpp index 1fc157900ed5..b44177b0a359 100644 --- a/llvm/lib/Target/Hexagon/HexagonRegisterInfo.cpp +++ b/llvm/lib/Target/Hexagon/HexagonRegisterInfo.cpp @@ -283,6 +283,36 @@ bool HexagonRegisterInfo::useFPForScavengingIndex(const MachineFunction &MF) return MF.getSubtarget().getFrameLowering()->hasFP(MF); } +// The stack alignment on Hexagon can actually decrease in some cases, +// specifically in some subset of cases when a variable-sized stack object +// is present. +// The issue is two-fold: +// First of all, if there is a variable-sized object and the stack needs +// extra alignment (due to pre-existing local objects), then a special +// register will be reserved up front, acting as the aligned stack pointer +// (call it AP). This register is only guaranteed to be live for accessing +// these pre-existing local objects (the ones with the higher alignment). +// Now, if the register allocator introduces vector register spills, their +// spill slots will initially have an alignment equal to the register size, +// which is higher than the normal stack alignment. Ideally, they should be +// loaded/stored using AP, but AP may not be available at all required +// places. To avoid this issue, the vector spill slots will have their +// alignment lowered to 8, and they will be loaded/stored using unaligned +// instructions. +// +// The lowering of the stack alignment may happen if the stack had a +// variable-sized object, but otherwise retained its default alignment +// up until register allocation. If the register allocator introduces +// a vector spill, it will cause the max stack alignment to grow +// (inside MachineFrameInfo). When the alignment of the spills is reset +// back to the default stack alignment, MFI's max stack alignment will +// not reflect that (since it cannot be lowered). Relying on that during +// frame lowering will cause an unnecessary stack realignment. +bool HexagonRegisterInfo::needsStackRealignment(const MachineFunction &MF) + const { + auto &HFI = *MF.getSubtarget().getFrameLowering(); + return HFI.getMaxStackAlignment(MF) > HFI.getStackAlignment(); +} unsigned HexagonRegisterInfo::getFirstCallerSavedNonParamReg() const { return Hexagon::R6; diff --git a/llvm/lib/Target/Hexagon/HexagonRegisterInfo.h b/llvm/lib/Target/Hexagon/HexagonRegisterInfo.h index 5f65fad2cc04..568bdd0b8065 100644 --- a/llvm/lib/Target/Hexagon/HexagonRegisterInfo.h +++ b/llvm/lib/Target/Hexagon/HexagonRegisterInfo.h @@ -55,6 +55,8 @@ public: return true; } + bool needsStackRealignment(const MachineFunction &MF) const; + /// Returns true if the frame pointer is valid. bool useFPForScavengingIndex(const MachineFunction &MF) const override; diff --git a/llvm/test/CodeGen/Hexagon/stack-align-reset.ll b/llvm/test/CodeGen/Hexagon/stack-align-reset.ll new file mode 100644 index 000000000000..0d028fb95b24 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/stack-align-reset.ll @@ -0,0 +1,51 @@ +; RUN: llc -march=hexagon < %s | FileCheck %s +; +; This used to crash. +; CHECK: call f1 + +target triple = "hexagon-unknown--elf" + +%struct.0 = type { [5 x i32] } +%struct.2 = type { i32, i32, i32, %struct.1* } +%struct.1 = type { i16*, i32, i32, i32 } + +@g0 = external hidden unnamed_addr constant [52 x i8], align 1 +@g1 = external hidden unnamed_addr constant [3 x i8], align 1 + +declare extern_weak void @f0(i32, i8*, i32, i8*, ...) #0 +declare void @f1(%struct.0*, i32) #0 + +define void @fred(i8* %a0) #0 { +b1: + %v2 = alloca %struct.0, align 4 + %v3 = alloca %struct.2, i32 undef, align 8 + br i1 undef, label %b5, label %b4 + +b4: ; preds = %b1 + br label %b7 + +b5: ; preds = %b5, %b1 + %v6 = getelementptr inbounds %struct.2, %struct.2* %v3, i32 undef, i32 3 + store %struct.1* undef, %struct.1** %v6, align 4 + br label %b5 + +b7: ; preds = %b10, %b4 + %v8 = call i32 @llvm.hexagon.V6.extractw(<16 x i32> zeroinitializer, i32 0) + br i1 icmp eq (void (i32, i8*, i32, i8*, ...)* @f0, void (i32, i8*, i32, i8*, ...)* null), label %b11, label %b9 + +b9: ; preds = %b7 + call void (i32, i8*, i32, i8*, ...) @f0(i32 2, i8* getelementptr inbounds ([52 x i8], [52 x i8]* @g0, i32 0, i32 0), i32 2346, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @g1, i32 0, i32 0), i32 %v8) + unreachable + +b10: ; preds = %b11 + call void @f1(%struct.0* nonnull %v2, i32 28) + br label %b7 + +b11: ; preds = %b11, %b7 + br i1 undef, label %b10, label %b11 +} + +declare i32 @llvm.hexagon.V6.extractw(<16 x i32>, i32) #1 + +attributes #0 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvx,-hvx-double" } +attributes #1 = { nounwind readnone }