AMDGPU: Fix kernel argument alignment impacting stack size

Don't use AllocateStack because kernel arguments have nothing
to do with the stack. The ensureMaxAlignment call was still
changing the stack alignment.

llvm-svn: 273080
This commit is contained in:
Matt Arsenault 2016-06-18 05:15:53 +00:00
parent e8fd9561cb
commit e935f05a94
5 changed files with 72 additions and 13 deletions

View File

@ -110,7 +110,7 @@ def CC_R600 : CallingConv<[
// Calling convention for compute kernels
def CC_AMDGPU_Kernel : CallingConv<[
CCCustom<"allocateStack">
CCCustom<"allocateKernArg">
]>;
def CC_AMDGPU : CallingConv<[

View File

@ -31,13 +31,15 @@
#include "SIInstrInfo.h"
using namespace llvm;
static bool allocateStack(unsigned ValNo, MVT ValVT, MVT LocVT,
CCValAssign::LocInfo LocInfo,
ISD::ArgFlagsTy ArgFlags, CCState &State) {
unsigned Offset = State.AllocateStack(ValVT.getStoreSize(),
ArgFlags.getOrigAlign());
State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
static bool allocateKernArg(unsigned ValNo, MVT ValVT, MVT LocVT,
CCValAssign::LocInfo LocInfo,
ISD::ArgFlagsTy ArgFlags, CCState &State) {
MachineFunction &MF = State.getMachineFunction();
AMDGPUMachineFunction *MFI = MF.getInfo<AMDGPUMachineFunction>();
uint64_t Offset = MFI->allocateKernArg(ValVT.getStoreSize(),
ArgFlags.getOrigAlign());
State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT, LocInfo));
return true;
}

View File

@ -1,8 +1,5 @@
#include "AMDGPUMachineFunction.h"
#include "AMDGPU.h"
#include "Utils/AMDGPUBaseInfo.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/Function.h"
using namespace llvm;
// Pin the vtable to this file.
@ -10,8 +7,9 @@ void AMDGPUMachineFunction::anchor() {}
AMDGPUMachineFunction::AMDGPUMachineFunction(const MachineFunction &MF) :
MachineFunctionInfo(),
KernArgSize(0),
MaxKernArgAlign(0),
LDSSize(0),
ABIArgOffset(0),
ScratchSize(0),
IsKernel(true) {
}
IsKernel(true) {}

View File

@ -16,10 +16,25 @@
namespace llvm {
class AMDGPUMachineFunction : public MachineFunctionInfo {
uint64_t KernArgSize;
unsigned MaxKernArgAlign;
virtual void anchor();
public:
AMDGPUMachineFunction(const MachineFunction &MF);
uint64_t allocateKernArg(uint64_t Size, unsigned Align) {
assert(isPowerOf2_32(Align));
KernArgSize = alignTo(KernArgSize, Align);
uint64_t Result = KernArgSize;
KernArgSize += Size;
MaxKernArgAlign = std::max(Align, MaxKernArgAlign);
return Result;
}
/// A map to keep track of local memory objects and their offsets within
/// the local memory space.
std::map<const GlobalValue *, unsigned> LocalMemoryObjects;

View File

@ -0,0 +1,44 @@
; RUN: llc -O0 -march=amdgcn -verify-machineinstrs < %s | FileCheck %s
; Test that the alignment of kernel arguments does not impact the
; alignment of the stack
; CHECK-LABEL: {{^}}no_args:
; CHECK: ScratchSize: 8{{$}}
define void @no_args() {
%alloca = alloca i8
store volatile i8 0, i8* %alloca
ret void
}
; CHECK-LABEL: {{^}}force_align32:
; CHECK: ScratchSize: 8{{$}}
define void @force_align32(<8 x i32>) {
%alloca = alloca i8
store volatile i8 0, i8* %alloca
ret void
}
; CHECK-LABEL: {{^}}force_align64:
; CHECK: ScratchSize: 8{{$}}
define void @force_align64(<16 x i32>) {
%alloca = alloca i8
store volatile i8 0, i8* %alloca
ret void
}
; CHECK-LABEL: {{^}}force_align128:
; CHECK: ScratchSize: 8{{$}}
define void @force_align128(<32 x i32>) {
%alloca = alloca i8
store volatile i8 0, i8* %alloca
ret void
}
; CHECK-LABEL: {{^}}force_align256:
; CHECK: ScratchSize: 8{{$}}
define void @force_align256(<64 x i32>) {
%alloca = alloca i8
store volatile i8 0, i8* %alloca
ret void
}