Recommit "[HIP] Change default --gpu-max-threads-per-block value to 1024"

Recommit 04abbb3a78
This commit is contained in:
Yaxun (Sam) Liu 2020-09-28 22:39:21 -04:00
parent b9f2b3bc43
commit 187658b8a6
4 changed files with 9 additions and 5 deletions

View File

@ -240,7 +240,7 @@ LANGOPT(CUDAHostDeviceConstexpr, 1, 1, "treating unattributed constexpr function
LANGOPT(CUDADeviceApproxTranscendentals, 1, 0, "using approximate transcendental functions")
LANGOPT(GPURelocatableDeviceCode, 1, 0, "generate relocatable device code")
LANGOPT(GPUAllowDeviceInit, 1, 0, "allowing device side global init functions for HIP")
LANGOPT(GPUMaxThreadsPerBlock, 32, 256, "default max threads per block for kernel launch bounds for HIP")
LANGOPT(GPUMaxThreadsPerBlock, 32, 1024, "default max threads per block for kernel launch bounds for HIP")
LANGOPT(SYCL , 1, 0, "SYCL")
LANGOPT(SYCLIsDevice , 1, 0, "Generate code for SYCL device")

View File

@ -9060,9 +9060,13 @@ void AMDGPUTargetCodeGenInfo::setTargetAttributes(
assert(Max == 0 && "Max must be zero");
} else if (IsOpenCLKernel || IsHIPKernel) {
// By default, restrict the maximum size to a value specified by
// --gpu-max-threads-per-block=n or its default value.
// --gpu-max-threads-per-block=n or its default value for HIP.
const unsigned OpenCLDefaultMaxWorkGroupSize = 256;
const unsigned DefaultMaxWorkGroupSize =
IsOpenCLKernel ? OpenCLDefaultMaxWorkGroupSize
: M.getLangOpts().GPUMaxThreadsPerBlock;
std::string AttrVal =
std::string("1,") + llvm::utostr(M.getLangOpts().GPUMaxThreadsPerBlock);
std::string("1,") + llvm::utostr(DefaultMaxWorkGroupSize);
F->addFnAttr("amdgpu-flat-work-group-size", AttrVal);
}

View File

@ -39,7 +39,7 @@ __global__ void num_vgpr_64() {
// NAMD-NOT: "amdgpu-num-vgpr"
// NAMD-NOT: "amdgpu-num-sgpr"
// DEFAULT-DAG: attributes [[FLAT_WORK_GROUP_SIZE_DEFAULT]] = {{.*}}"amdgpu-flat-work-group-size"="1,256"{{.*}}"uniform-work-group-size"="true"
// DEFAULT-DAG: attributes [[FLAT_WORK_GROUP_SIZE_DEFAULT]] = {{.*}}"amdgpu-flat-work-group-size"="1,1024"{{.*}}"uniform-work-group-size"="true"
// MAX1024-DAG: attributes [[FLAT_WORK_GROUP_SIZE_DEFAULT]] = {{.*}}"amdgpu-flat-work-group-size"="1,1024"
// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64]] = {{.*}}"amdgpu-flat-work-group-size"="32,64"
// CHECK-DAG: attributes [[WAVES_PER_EU_2]] = {{.*}}"amdgpu-waves-per-eu"="2"

View File

@ -39,4 +39,4 @@ int main() {
launch((void*)D.Empty());
return 0;
}
// CHECK: attributes #[[ATTR]] = {{.*}}"amdgpu-flat-work-group-size"="1,256"
// CHECK: attributes #[[ATTR]] = {{.*}}"amdgpu-flat-work-group-size"="1,1024"