From 494b8ac95a79031fc19266dea42d29e140444b47 Mon Sep 17 00:00:00 2001 From: Carl Ritson Date: Fri, 8 Feb 2019 15:41:11 +0000 Subject: [PATCH] [AMDGPU] Fix CS scratch setup on pre-GCN3 ASICs Summary: Prior to GCN3 s_load_dword offsets are in dwords rather than bytes. Thus the scratch buffer descriptor offset must be adjusted for pre-GCN3 ASICs. Reviewers: nhaehnle, tpr Reviewed By: nhaehnle Subscribers: sheredom, arsenm, kzhuravl, jvesely, wdng, yaxunl, dstuttard, t-tye, jfb, llvm-commits Differential Revision: https://reviews.llvm.org/D56496 llvm-svn: 353530 --- llvm/lib/Target/AMDGPU/SIFrameLowering.cpp | 4 +++- llvm/test/CodeGen/AMDGPU/amdpal.ll | 7 +++++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp index 175e2bd84a21..536884704006 100644 --- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp @@ -422,9 +422,11 @@ void SIFrameLowering::emitEntryFunctionScratchSetup(const GCNSubtarget &ST, MachineMemOperand::MODereferenceable, 16, 4); unsigned Offset = Fn.getCallingConv() == CallingConv::AMDGPU_CS ? 16 : 0; + const GCNSubtarget &Subtarget = MF.getSubtarget(); + unsigned EncodedOffset = AMDGPU::getSMRDEncodedOffset(Subtarget, Offset); BuildMI(MBB, I, DL, LoadDwordX4, ScratchRsrcReg) .addReg(Rsrc01) - .addImm(Offset) // offset + .addImm(EncodedOffset) // offset .addImm(0) // glc .addReg(ScratchRsrcReg, RegState::ImplicitDefine) .addMemOperand(MMO); diff --git a/llvm/test/CodeGen/AMDGPU/amdpal.ll b/llvm/test/CodeGen/AMDGPU/amdpal.ll index 8d4c09cdeaf5..c6d082ded048 100644 --- a/llvm/test/CodeGen/AMDGPU/amdpal.ll +++ b/llvm/test/CodeGen/AMDGPU/amdpal.ll @@ -1,4 +1,5 @@ -; RUN: llc < %s -mtriple=amdgcn--amdpal -mcpu=tahiti | FileCheck --check-prefix=PAL --enable-var-scope %s +; RUN: llc < %s -mtriple=amdgcn--amdpal -mcpu=tahiti | FileCheck --check-prefixes=PAL,CI --enable-var-scope %s +; RUN: llc < %s -mtriple=amdgcn--amdpal -mcpu=tonga | FileCheck --check-prefixes=PAL,VI --enable-var-scope %s ; PAL-NOT: .AMDGPU.config ; PAL-LABEL: {{^}}simple: @@ -55,11 +56,13 @@ entry: ; Check code sequence for amdpal use of scratch for alloca in a compute shader. ; The scratch descriptor is loaded from offset 0x10 of the GIT, rather than offset ; 0 in a graphics shader. +; Prior to GCN3 s_load_dword offsets are dwords, so the offset will be 0x4. ; PAL-LABEL: {{^}}scratch2_cs: ; PAL: s_movk_i32 s{{[0-9]+}}, 0x1234 ; PAL: s_mov_b32 s[[GITPTR:[0-9]+]], s0 -; PAL: s_load_dwordx4 s{{\[}}[[SCRATCHDESC:[0-9]+]]:{{[0-9]+]}}, s{{\[}}[[GITPTR]]:{{[0-9]+\]}}, 0x10 +; CI: s_load_dwordx4 s{{\[}}[[SCRATCHDESC:[0-9]+]]:{{[0-9]+]}}, s{{\[}}[[GITPTR]]:{{[0-9]+\]}}, 0x4 +; VI: s_load_dwordx4 s{{\[}}[[SCRATCHDESC:[0-9]+]]:{{[0-9]+]}}, s{{\[}}[[GITPTR]]:{{[0-9]+\]}}, 0x10 ; PAL: buffer_store{{.*}}, s{{\[}}[[SCRATCHDESC]]: define amdgpu_cs void @scratch2_cs(i32 inreg, i32 inreg, i32 inreg, <3 x i32> inreg, i32 inreg, <3 x i32> %coord, <2 x i32> %in, i32 %extra, i32 %idx) #0 {