forked from OSchip/llvm-project
AMDGPU: Fix kernel argument alignment impacting stack size
Don't use AllocateStack because kernel arguments have nothing to do with the stack. The ensureMaxAlignment call was still changing the stack alignment. llvm-svn: 273080
This commit is contained in:
parent
e8fd9561cb
commit
e935f05a94
|
@ -110,7 +110,7 @@ def CC_R600 : CallingConv<[
|
|||
|
||||
// Calling convention for compute kernels
|
||||
def CC_AMDGPU_Kernel : CallingConv<[
|
||||
CCCustom<"allocateStack">
|
||||
CCCustom<"allocateKernArg">
|
||||
]>;
|
||||
|
||||
def CC_AMDGPU : CallingConv<[
|
||||
|
|
|
@ -31,13 +31,15 @@
|
|||
#include "SIInstrInfo.h"
|
||||
using namespace llvm;
|
||||
|
||||
static bool allocateStack(unsigned ValNo, MVT ValVT, MVT LocVT,
|
||||
CCValAssign::LocInfo LocInfo,
|
||||
ISD::ArgFlagsTy ArgFlags, CCState &State) {
|
||||
unsigned Offset = State.AllocateStack(ValVT.getStoreSize(),
|
||||
ArgFlags.getOrigAlign());
|
||||
State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
|
||||
static bool allocateKernArg(unsigned ValNo, MVT ValVT, MVT LocVT,
|
||||
CCValAssign::LocInfo LocInfo,
|
||||
ISD::ArgFlagsTy ArgFlags, CCState &State) {
|
||||
MachineFunction &MF = State.getMachineFunction();
|
||||
AMDGPUMachineFunction *MFI = MF.getInfo<AMDGPUMachineFunction>();
|
||||
|
||||
uint64_t Offset = MFI->allocateKernArg(ValVT.getStoreSize(),
|
||||
ArgFlags.getOrigAlign());
|
||||
State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT, LocInfo));
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
|
@ -1,8 +1,5 @@
|
|||
#include "AMDGPUMachineFunction.h"
|
||||
#include "AMDGPU.h"
|
||||
#include "Utils/AMDGPUBaseInfo.h"
|
||||
#include "llvm/IR/Attributes.h"
|
||||
#include "llvm/IR/Function.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
// Pin the vtable to this file.
|
||||
|
@ -10,8 +7,9 @@ void AMDGPUMachineFunction::anchor() {}
|
|||
|
||||
AMDGPUMachineFunction::AMDGPUMachineFunction(const MachineFunction &MF) :
|
||||
MachineFunctionInfo(),
|
||||
KernArgSize(0),
|
||||
MaxKernArgAlign(0),
|
||||
LDSSize(0),
|
||||
ABIArgOffset(0),
|
||||
ScratchSize(0),
|
||||
IsKernel(true) {
|
||||
}
|
||||
IsKernel(true) {}
|
||||
|
|
|
@ -16,10 +16,25 @@
|
|||
namespace llvm {
|
||||
|
||||
class AMDGPUMachineFunction : public MachineFunctionInfo {
|
||||
uint64_t KernArgSize;
|
||||
unsigned MaxKernArgAlign;
|
||||
|
||||
virtual void anchor();
|
||||
|
||||
public:
|
||||
AMDGPUMachineFunction(const MachineFunction &MF);
|
||||
|
||||
uint64_t allocateKernArg(uint64_t Size, unsigned Align) {
|
||||
assert(isPowerOf2_32(Align));
|
||||
KernArgSize = alignTo(KernArgSize, Align);
|
||||
|
||||
uint64_t Result = KernArgSize;
|
||||
KernArgSize += Size;
|
||||
|
||||
MaxKernArgAlign = std::max(Align, MaxKernArgAlign);
|
||||
return Result;
|
||||
}
|
||||
|
||||
/// A map to keep track of local memory objects and their offsets within
|
||||
/// the local memory space.
|
||||
std::map<const GlobalValue *, unsigned> LocalMemoryObjects;
|
||||
|
|
|
@ -0,0 +1,44 @@
|
|||
; RUN: llc -O0 -march=amdgcn -verify-machineinstrs < %s | FileCheck %s
|
||||
|
||||
; Test that the alignment of kernel arguments does not impact the
|
||||
; alignment of the stack
|
||||
|
||||
; CHECK-LABEL: {{^}}no_args:
|
||||
; CHECK: ScratchSize: 8{{$}}
|
||||
define void @no_args() {
|
||||
%alloca = alloca i8
|
||||
store volatile i8 0, i8* %alloca
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}force_align32:
|
||||
; CHECK: ScratchSize: 8{{$}}
|
||||
define void @force_align32(<8 x i32>) {
|
||||
%alloca = alloca i8
|
||||
store volatile i8 0, i8* %alloca
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}force_align64:
|
||||
; CHECK: ScratchSize: 8{{$}}
|
||||
define void @force_align64(<16 x i32>) {
|
||||
%alloca = alloca i8
|
||||
store volatile i8 0, i8* %alloca
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}force_align128:
|
||||
; CHECK: ScratchSize: 8{{$}}
|
||||
define void @force_align128(<32 x i32>) {
|
||||
%alloca = alloca i8
|
||||
store volatile i8 0, i8* %alloca
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}force_align256:
|
||||
; CHECK: ScratchSize: 8{{$}}
|
||||
define void @force_align256(<64 x i32>) {
|
||||
%alloca = alloca i8
|
||||
store volatile i8 0, i8* %alloca
|
||||
ret void
|
||||
}
|
Loading…
Reference in New Issue