forked from OSchip/llvm-project
Recommit "[HIP] Change default --gpu-max-threads-per-block value to 1024"
Recommit 04abbb3a78
This commit is contained in:
parent
b9f2b3bc43
commit
187658b8a6
|
@ -240,7 +240,7 @@ LANGOPT(CUDAHostDeviceConstexpr, 1, 1, "treating unattributed constexpr function
|
|||
LANGOPT(CUDADeviceApproxTranscendentals, 1, 0, "using approximate transcendental functions")
|
||||
LANGOPT(GPURelocatableDeviceCode, 1, 0, "generate relocatable device code")
|
||||
LANGOPT(GPUAllowDeviceInit, 1, 0, "allowing device side global init functions for HIP")
|
||||
LANGOPT(GPUMaxThreadsPerBlock, 32, 256, "default max threads per block for kernel launch bounds for HIP")
|
||||
LANGOPT(GPUMaxThreadsPerBlock, 32, 1024, "default max threads per block for kernel launch bounds for HIP")
|
||||
|
||||
LANGOPT(SYCL , 1, 0, "SYCL")
|
||||
LANGOPT(SYCLIsDevice , 1, 0, "Generate code for SYCL device")
|
||||
|
|
|
@ -9060,9 +9060,13 @@ void AMDGPUTargetCodeGenInfo::setTargetAttributes(
|
|||
assert(Max == 0 && "Max must be zero");
|
||||
} else if (IsOpenCLKernel || IsHIPKernel) {
|
||||
// By default, restrict the maximum size to a value specified by
|
||||
// --gpu-max-threads-per-block=n or its default value.
|
||||
// --gpu-max-threads-per-block=n or its default value for HIP.
|
||||
const unsigned OpenCLDefaultMaxWorkGroupSize = 256;
|
||||
const unsigned DefaultMaxWorkGroupSize =
|
||||
IsOpenCLKernel ? OpenCLDefaultMaxWorkGroupSize
|
||||
: M.getLangOpts().GPUMaxThreadsPerBlock;
|
||||
std::string AttrVal =
|
||||
std::string("1,") + llvm::utostr(M.getLangOpts().GPUMaxThreadsPerBlock);
|
||||
std::string("1,") + llvm::utostr(DefaultMaxWorkGroupSize);
|
||||
F->addFnAttr("amdgpu-flat-work-group-size", AttrVal);
|
||||
}
|
||||
|
||||
|
|
|
@ -39,7 +39,7 @@ __global__ void num_vgpr_64() {
|
|||
// NAMD-NOT: "amdgpu-num-vgpr"
|
||||
// NAMD-NOT: "amdgpu-num-sgpr"
|
||||
|
||||
// DEFAULT-DAG: attributes [[FLAT_WORK_GROUP_SIZE_DEFAULT]] = {{.*}}"amdgpu-flat-work-group-size"="1,256"{{.*}}"uniform-work-group-size"="true"
|
||||
// DEFAULT-DAG: attributes [[FLAT_WORK_GROUP_SIZE_DEFAULT]] = {{.*}}"amdgpu-flat-work-group-size"="1,1024"{{.*}}"uniform-work-group-size"="true"
|
||||
// MAX1024-DAG: attributes [[FLAT_WORK_GROUP_SIZE_DEFAULT]] = {{.*}}"amdgpu-flat-work-group-size"="1,1024"
|
||||
// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64]] = {{.*}}"amdgpu-flat-work-group-size"="32,64"
|
||||
// CHECK-DAG: attributes [[WAVES_PER_EU_2]] = {{.*}}"amdgpu-waves-per-eu"="2"
|
||||
|
|
|
@ -39,4 +39,4 @@ int main() {
|
|||
launch((void*)D.Empty());
|
||||
return 0;
|
||||
}
|
||||
// CHECK: attributes #[[ATTR]] = {{.*}}"amdgpu-flat-work-group-size"="1,256"
|
||||
// CHECK: attributes #[[ATTR]] = {{.*}}"amdgpu-flat-work-group-size"="1,1024"
|
||||
|
|
Loading…
Reference in New Issue