From 53c43431bc6a01ad1e29b9351450ac18d5270ab3 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Mon, 19 Oct 2020 16:53:00 -0400 Subject: [PATCH] AMDGPU: Propagate amdgpu-flat-work-group-size attributes Fixes being overly conservative with the register counts in called functions. This should try to do a conservative range merge, but for now just clone. Also fix not being able to functionally run the pass standalone. --- .../AMDGPU/AMDGPUPropagateAttributes.cpp | 24 ++++++++-- ...opagate-attributes-flat-work-group-size.ll | 48 +++++++++++++++++++ 2 files changed, 68 insertions(+), 4 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/propagate-attributes-flat-work-group-size.ll diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPropagateAttributes.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPropagateAttributes.cpp index 982aae374884..dcbe4270e8a9 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUPropagateAttributes.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUPropagateAttributes.cpp @@ -32,6 +32,7 @@ #include "Utils/AMDGPUBaseInfo.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/IR/Function.h" #include "llvm/IR/Module.h" #include "llvm/Target/TargetMachine.h" @@ -56,8 +57,10 @@ static constexpr const FeatureBitset TargetFeatures = { }; // Attributes to propagate. +// TODO: Support conservative min/max merging instead of cloning. static constexpr const char* AttributeNames[] = { - "amdgpu-waves-per-eu" + "amdgpu-waves-per-eu", + "amdgpu-flat-work-group-size" }; static constexpr unsigned NumAttr = @@ -371,15 +374,28 @@ AMDGPUPropagateAttributes::getFeatureString(const FeatureBitset &Features) const } bool AMDGPUPropagateAttributesEarly::runOnFunction(Function &F) { - if (!TM || !AMDGPU::isEntryFunctionCC(F.getCallingConv())) + if (!TM) { + auto *TPC = getAnalysisIfAvailable(); + if (!TPC) + return false; + + TM = &TPC->getTM(); + } + + if (!AMDGPU::isEntryFunctionCC(F.getCallingConv())) return false; return AMDGPUPropagateAttributes(TM, false).process(F); } bool AMDGPUPropagateAttributesLate::runOnModule(Module &M) { - if (!TM) - return false; + if (!TM) { + auto *TPC = getAnalysisIfAvailable(); + if (!TPC) + return false; + + TM = &TPC->getTM(); + } return AMDGPUPropagateAttributes(TM, true).process(M); } diff --git a/llvm/test/CodeGen/AMDGPU/propagate-attributes-flat-work-group-size.ll b/llvm/test/CodeGen/AMDGPU/propagate-attributes-flat-work-group-size.ll new file mode 100644 index 000000000000..30c6eded2397 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/propagate-attributes-flat-work-group-size.ll @@ -0,0 +1,48 @@ +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-propagate-attributes-late %s | FileCheck %s + +; CHECK: define internal void @max_flat_1_1024() #0 { +define internal void @max_flat_1_1024() #0 { + ret void +} + +; CHECK: define internal void @max_flat_1_256() #1 { +define internal void @max_flat_1_256() #1 { + ret void +} + +; CHECK: define amdgpu_kernel void @kernel_1_256_call_default() #1 { +define amdgpu_kernel void @kernel_1_256_call_default() #1 { + call void @default() + ret void +} + +; CHECK: define amdgpu_kernel void @kernel_1_256_call_1_256() #1 { +define amdgpu_kernel void @kernel_1_256_call_1_256() #1 { + call void @max_flat_1_256() + ret void +} + +; CHECK: define amdgpu_kernel void @kernel_1_256_call_64_64() #1 { +define amdgpu_kernel void @kernel_1_256_call_64_64() #1 { + call void @max_flat_64_64() + ret void +} + +; CHECK: define internal void @max_flat_64_64() #2 { +define internal void @max_flat_64_64() #2 { + ret void +} + +; CHECK: define internal void @default() #2 { +define internal void @default() #3 { + ret void +} + +attributes #0 = { noinline "amdgpu-flat-work-group-size"="1,1024" } +attributes #1 = { noinline "amdgpu-flat-work-group-size"="1,256" } +attributes #2 = { noinline "amdgpu-flat-work-group-size"="64,64" } +attributes #3 = { noinline } + +; CHECK: attributes #0 = { noinline "amdgpu-flat-work-group-size"="1,1024" +; CHECK-NEXT: attributes #1 = { noinline "amdgpu-flat-work-group-size"="1,256" +; CHECK-NEXT: attributes #2 = { noinline "amdgpu-flat-work-group-size"="1,256"