[amdgpu] Increase alignment of all LDS variables

Currently the superalign option only increases the alignment of
variables that are moved into the module.lds block. Change that to all LDS
variables. Also only increase the alignment once, instead of once per function.

Reviewed By: rampitec

Differential Revision: https://reviews.llvm.org/D115488
This commit is contained in:
Jon Chesterfield 2021-12-12 19:30:32 +00:00
parent d2377f24e1
commit 24b28db8cc
2 changed files with 52 additions and 29 deletions

View File

@ -164,8 +164,8 @@ public:
bool runOnModule(Module &M) override { bool runOnModule(Module &M) override {
UsedList = getUsedList(M); UsedList = getUsedList(M);
bool Changed = superAlignLDSGlobals(M);
bool Changed = processUsedLDS(M); Changed |= processUsedLDS(M);
for (Function &F : M.functions()) { for (Function &F : M.functions()) {
if (F.isDeclaration()) if (F.isDeclaration())
@ -182,25 +182,27 @@ public:
} }
private: private:
bool processUsedLDS(Module &M, Function *F = nullptr) {
LLVMContext &Ctx = M.getContext();
const DataLayout &DL = M.getDataLayout();
// Find variables to move into new struct instance
std::vector<GlobalVariable *> FoundLocalVars =
AMDGPU::findVariablesToLower(M, F);
if (FoundLocalVars.empty()) {
// No variables to rewrite, no changes made.
return false;
}
// Increase the alignment of LDS globals if necessary to maximise the chance // Increase the alignment of LDS globals if necessary to maximise the chance
// that we can use aligned LDS instructions to access them. // that we can use aligned LDS instructions to access them.
if (SuperAlignLDSGlobals) { static bool superAlignLDSGlobals(Module &M) {
for (auto *GV : FoundLocalVars) { const DataLayout &DL = M.getDataLayout();
Align Alignment = AMDGPU::getAlign(DL, GV); bool Changed = false;
TypeSize GVSize = DL.getTypeAllocSize(GV->getValueType()); if (!SuperAlignLDSGlobals) {
return Changed;
}
for (auto &GV : M.globals()) {
if (GV.getType()->getPointerAddressSpace() != AMDGPUAS::LOCAL_ADDRESS) {
// Only changing alignment of LDS variables
continue;
}
if (!GV.hasInitializer()) {
// cuda/hip extern __shared__ variable, leave alignment alone
continue;
}
Align Alignment = AMDGPU::getAlign(DL, &GV);
TypeSize GVSize = DL.getTypeAllocSize(GV.getValueType());
if (GVSize > 8) { if (GVSize > 8) {
// We might want to use a b96 or b128 load/store // We might want to use a b96 or b128 load/store
@ -216,9 +218,26 @@ private:
Alignment = std::max(Alignment, Align(2)); Alignment = std::max(Alignment, Align(2));
} }
GV->setAlignment(Alignment); if (Alignment != AMDGPU::getAlign(DL, &GV)) {
Changed = true;
GV.setAlignment(Alignment);
} }
} }
return Changed;
}
bool processUsedLDS(Module &M, Function *F = nullptr) {
LLVMContext &Ctx = M.getContext();
const DataLayout &DL = M.getDataLayout();
// Find variables to move into new struct instance
std::vector<GlobalVariable *> FoundLocalVars =
AMDGPU::findVariablesToLower(M, F);
if (FoundLocalVars.empty()) {
// No variables to rewrite, no changes made.
return false;
}
SmallVector<OptimizedStructLayoutField, 8> LayoutFields; SmallVector<OptimizedStructLayoutField, 8> LayoutFields;
LayoutFields.reserve(FoundLocalVars.size()); LayoutFields.reserve(FoundLocalVars.size());

View File

@ -1,5 +1,5 @@
; RUN: opt -S -mtriple=amdgcn-- -amdgpu-lower-module-lds < %s | FileCheck --check-prefixes=CHECK,SUPER-ALIGN_ON %s ; RUN: opt -S -mtriple=amdgcn-- -amdgpu-lower-module-lds --amdgpu-super-align-lds-globals=true < %s | FileCheck --check-prefixes=CHECK,SUPER-ALIGN_ON %s
; RUN: opt -S -mtriple=amdgcn-- -passes=amdgpu-lower-module-lds < %s | FileCheck --check-prefixes=CHECK,SUPER-ALIGN_ON %s ; RUN: opt -S -mtriple=amdgcn-- -passes=amdgpu-lower-module-lds --amdgpu-super-align-lds-globals=true < %s | FileCheck --check-prefixes=CHECK,SUPER-ALIGN_ON %s
; RUN: opt -S -mtriple=amdgcn-- -amdgpu-lower-module-lds --amdgpu-super-align-lds-globals=false < %s | FileCheck --check-prefixes=CHECK,SUPER-ALIGN_OFF %s ; RUN: opt -S -mtriple=amdgcn-- -amdgpu-lower-module-lds --amdgpu-super-align-lds-globals=false < %s | FileCheck --check-prefixes=CHECK,SUPER-ALIGN_OFF %s
; RUN: opt -S -mtriple=amdgcn-- -passes=amdgpu-lower-module-lds --amdgpu-super-align-lds-globals=false < %s | FileCheck --check-prefixes=CHECK,SUPER-ALIGN_OFF %s ; RUN: opt -S -mtriple=amdgcn-- -passes=amdgpu-lower-module-lds --amdgpu-super-align-lds-globals=false < %s | FileCheck --check-prefixes=CHECK,SUPER-ALIGN_OFF %s
@ -8,6 +8,10 @@
; CHECK: %llvm.amdgcn.kernel.k3.lds.t = type { [32 x i64], [32 x i32] } ; CHECK: %llvm.amdgcn.kernel.k3.lds.t = type { [32 x i64], [32 x i32] }
; CHECK: %llvm.amdgcn.kernel.k4.lds.t = type { [2 x i32 addrspace(3)*] } ; CHECK: %llvm.amdgcn.kernel.k4.lds.t = type { [2 x i32 addrspace(3)*] }
; SUPER-ALIGN_ON: @lds.unused = addrspace(3) global i32 undef, align 4
; SUPER-ALIGN_OFF: @lds.unused = addrspace(3) global i32 undef, align 2
@lds.unused = addrspace(3) global i32 undef, align 2
; CHECK-NOT: @lds.1 ; CHECK-NOT: @lds.1
@lds.1 = internal unnamed_addr addrspace(3) global [32 x i8] undef, align 1 @lds.1 = internal unnamed_addr addrspace(3) global [32 x i8] undef, align 1