Use hipGetDeviceProperties to avoid int overflows

This commit is contained in:
Richard Berger 2021-02-23 10:14:05 -05:00
parent d44af3256d
commit 446a068159
No known key found for this signature in database
GPG Key ID: A9E83994E0BA0CAB
1 changed files with 21 additions and 18 deletions

View File

@ -41,8 +41,8 @@ struct NVDProperties {
int maxThreadsPerBlock; int maxThreadsPerBlock;
int maxThreadsDim[3]; int maxThreadsDim[3];
int maxGridSize[3]; int maxGridSize[3];
int sharedMemPerBlock; CUDA_INT_TYPE sharedMemPerBlock;
int totalConstantMemory; CUDA_INT_TYPE totalConstantMemory;
int SIMDWidth; int SIMDWidth;
int memPitch; int memPitch;
int regsPerBlock; int regsPerBlock;
@ -362,32 +362,35 @@ UCL_Device::UCL_Device() {
CU_SAFE_CALL_NS(hipDeviceGetName(namecstr,1024,dev)); CU_SAFE_CALL_NS(hipDeviceGetName(namecstr,1024,dev));
prop.name=namecstr; prop.name=namecstr;
CU_SAFE_CALL_NS(hipDeviceTotalMem(&prop.totalGlobalMem,dev)); hipDeviceProp_t hip_prop;
CU_SAFE_CALL_NS(hipDeviceGetAttribute(&prop.multiProcessorCount, hipDeviceAttributeMultiprocessorCount, dev));
CU_SAFE_CALL_NS(hipDeviceGetAttribute(&prop.maxThreadsPerBlock, hipDeviceAttributeMaxThreadsPerBlock, dev)); CU_SAFE_CALL_NS(hipGetDeviceProperties(&hip_prop,dev));
CU_SAFE_CALL_NS(hipDeviceGetAttribute(&prop.maxThreadsDim[0], hipDeviceAttributeMaxBlockDimX, dev));
CU_SAFE_CALL_NS(hipDeviceGetAttribute(&prop.maxThreadsDim[1], hipDeviceAttributeMaxBlockDimY, dev)); prop.totalGlobalMem = hip_prop.totalGlobalMem;
CU_SAFE_CALL_NS(hipDeviceGetAttribute(&prop.maxThreadsDim[2], hipDeviceAttributeMaxBlockDimZ, dev)); prop.multiProcessorCount = hip_prop.multiProcessorCount;
CU_SAFE_CALL_NS(hipDeviceGetAttribute(&prop.maxGridSize[0], hipDeviceAttributeMaxGridDimX, dev)); prop.maxThreadsPerBlock = hip_prop.maxThreadsPerBlock;
CU_SAFE_CALL_NS(hipDeviceGetAttribute(&prop.maxGridSize[1], hipDeviceAttributeMaxGridDimY, dev)); prop.maxThreadsDim[0] = hip_prop.maxThreadsDim[0];
CU_SAFE_CALL_NS(hipDeviceGetAttribute(&prop.maxGridSize[2], hipDeviceAttributeMaxGridDimZ, dev)); prop.maxThreadsDim[1] = hip_prop.maxThreadsDim[1];
CU_SAFE_CALL_NS(hipDeviceGetAttribute(&prop.sharedMemPerBlock, hipDeviceAttributeMaxSharedMemoryPerBlock, dev)); prop.maxThreadsDim[2] = hip_prop.maxThreadsDim[2];
CU_SAFE_CALL_NS(hipDeviceGetAttribute(&prop.totalConstantMemory, hipDeviceAttributeTotalConstantMemory, dev)); prop.maxGridSize[0] = hip_prop.maxGridSize[0];
CU_SAFE_CALL_NS(hipDeviceGetAttribute(&prop.SIMDWidth, hipDeviceAttributeWarpSize, dev)); prop.maxGridSize[1] = hip_prop.maxGridSize[1];
prop.maxGridSize[2] = hip_prop.maxGridSize[2];
prop.sharedMemPerBlock = hip_prop.sharedMemPerBlock;
prop.totalConstantMemory = hip_prop.totalConstMem;
prop.SIMDWidth = hip_prop.warpSize;
prop.regsPerBlock = hip_prop.regsPerBlock;
prop.clockRate = hip_prop.clockRate;
prop.computeMode = hip_prop.computeMode;
//CU_SAFE_CALL_NS(hipDeviceGetAttribute(&prop.memPitch, CU_DEVICE_ATTRIBUTE_MAX_PITCH, dev)); //CU_SAFE_CALL_NS(hipDeviceGetAttribute(&prop.memPitch, CU_DEVICE_ATTRIBUTE_MAX_PITCH, dev));
CU_SAFE_CALL_NS(hipDeviceGetAttribute(&prop.regsPerBlock, hipDeviceAttributeMaxRegistersPerBlock, dev));
CU_SAFE_CALL_NS(hipDeviceGetAttribute(&prop.clockRate, hipDeviceAttributeClockRate, dev));
//CU_SAFE_CALL_NS(hipDeviceGetAttribute(&prop.textureAlign, CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT, dev)); //CU_SAFE_CALL_NS(hipDeviceGetAttribute(&prop.textureAlign, CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT, dev));
//#if CUDA_VERSION >= 2020 //#if CUDA_VERSION >= 2020
//CU_SAFE_CALL_NS(hipDeviceGetAttribute(&prop.kernelExecTimeoutEnabled, CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT,dev)); //CU_SAFE_CALL_NS(hipDeviceGetAttribute(&prop.kernelExecTimeoutEnabled, CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT,dev));
CU_SAFE_CALL_NS(hipDeviceGetAttribute(&prop.integrated, hipDeviceAttributeIntegrated, dev)); CU_SAFE_CALL_NS(hipDeviceGetAttribute(&prop.integrated, hipDeviceAttributeIntegrated, dev));
//CU_SAFE_CALL_NS(hipDeviceGetAttribute(&prop.canMapHostMemory, CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY, dev)); //CU_SAFE_CALL_NS(hipDeviceGetAttribute(&prop.canMapHostMemory, CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY, dev));
CU_SAFE_CALL_NS(hipDeviceGetAttribute(&prop.computeMode, hipDeviceAttributeComputeMode,dev));
//#endif //#endif
//#if CUDA_VERSION >= 3010 //#if CUDA_VERSION >= 3010
CU_SAFE_CALL_NS(hipDeviceGetAttribute(&prop.concurrentKernels, hipDeviceAttributeConcurrentKernels, dev)); prop.concurrentKernels = hip_prop.concurrentKernels;
//CU_SAFE_CALL_NS(hipDeviceGetAttribute(&prop.ECCEnabled, CU_DEVICE_ATTRIBUTE_ECC_ENABLED, dev)); //CU_SAFE_CALL_NS(hipDeviceGetAttribute(&prop.ECCEnabled, CU_DEVICE_ATTRIBUTE_ECC_ENABLED, dev));
//#endif //#endif