Use hipGetDeviceProperties to avoid int overflows

2021-02-23 10:14:05 -05:00 · 2021-02-23 10:14:05 -05:00 · 446a068159
parent d44af3256d
commit 446a068159
1 changed files with 21 additions and 18 deletions
--- a/lib/gpu/geryon/hip_device.h
+++ b/lib/gpu/geryon/hip_device.h
@ -41,8 +41,8 @@ struct NVDProperties {
  int maxThreadsPerBlock;
  int maxThreadsDim[3];
  int maxGridSize[3];
-  int sharedMemPerBlock;
+  CUDA_INT_TYPE sharedMemPerBlock;
-  int totalConstantMemory;
+  CUDA_INT_TYPE totalConstantMemory;
  int SIMDWidth;
  int memPitch;
  int regsPerBlock;
@ -362,32 +362,35 @@ UCL_Device::UCL_Device() {
    CU_SAFE_CALL_NS(hipDeviceGetName(namecstr,1024,dev));
    prop.name=namecstr;
-    CU_SAFE_CALL_NS(hipDeviceTotalMem(&prop.totalGlobalMem,dev));
+    hipDeviceProp_t hip_prop;
    CU_SAFE_CALL_NS(hipDeviceGetAttribute(&prop.multiProcessorCount, hipDeviceAttributeMultiprocessorCount, dev));
-    CU_SAFE_CALL_NS(hipDeviceGetAttribute(&prop.maxThreadsPerBlock, hipDeviceAttributeMaxThreadsPerBlock, dev));
+    CU_SAFE_CALL_NS(hipGetDeviceProperties(&hip_prop,dev));
-    CU_SAFE_CALL_NS(hipDeviceGetAttribute(&prop.maxThreadsDim[0], hipDeviceAttributeMaxBlockDimX, dev));
+
-    CU_SAFE_CALL_NS(hipDeviceGetAttribute(&prop.maxThreadsDim[1], hipDeviceAttributeMaxBlockDimY, dev));
+    prop.totalGlobalMem = hip_prop.totalGlobalMem;
-    CU_SAFE_CALL_NS(hipDeviceGetAttribute(&prop.maxThreadsDim[2], hipDeviceAttributeMaxBlockDimZ, dev));
+    prop.multiProcessorCount = hip_prop.multiProcessorCount;
-    CU_SAFE_CALL_NS(hipDeviceGetAttribute(&prop.maxGridSize[0], hipDeviceAttributeMaxGridDimX, dev));
+    prop.maxThreadsPerBlock = hip_prop.maxThreadsPerBlock;
-    CU_SAFE_CALL_NS(hipDeviceGetAttribute(&prop.maxGridSize[1], hipDeviceAttributeMaxGridDimY, dev));
+    prop.maxThreadsDim[0] = hip_prop.maxThreadsDim[0];
-    CU_SAFE_CALL_NS(hipDeviceGetAttribute(&prop.maxGridSize[2], hipDeviceAttributeMaxGridDimZ, dev));
+    prop.maxThreadsDim[1] = hip_prop.maxThreadsDim[1];
-    CU_SAFE_CALL_NS(hipDeviceGetAttribute(&prop.sharedMemPerBlock, hipDeviceAttributeMaxSharedMemoryPerBlock, dev));
+    prop.maxThreadsDim[2] = hip_prop.maxThreadsDim[2];
-    CU_SAFE_CALL_NS(hipDeviceGetAttribute(&prop.totalConstantMemory, hipDeviceAttributeTotalConstantMemory, dev));
+    prop.maxGridSize[0] = hip_prop.maxGridSize[0];
-    CU_SAFE_CALL_NS(hipDeviceGetAttribute(&prop.SIMDWidth, hipDeviceAttributeWarpSize, dev));
+    prop.maxGridSize[1] = hip_prop.maxGridSize[1];
    prop.maxGridSize[2] = hip_prop.maxGridSize[2];
    prop.sharedMemPerBlock = hip_prop.sharedMemPerBlock;
    prop.totalConstantMemory = hip_prop.totalConstMem;
    prop.SIMDWidth = hip_prop.warpSize;
    prop.regsPerBlock = hip_prop.regsPerBlock;
    prop.clockRate = hip_prop.clockRate;
    prop.computeMode = hip_prop.computeMode;
    //CU_SAFE_CALL_NS(hipDeviceGetAttribute(&prop.memPitch, CU_DEVICE_ATTRIBUTE_MAX_PITCH, dev));
    CU_SAFE_CALL_NS(hipDeviceGetAttribute(&prop.regsPerBlock, hipDeviceAttributeMaxRegistersPerBlock, dev));
    CU_SAFE_CALL_NS(hipDeviceGetAttribute(&prop.clockRate, hipDeviceAttributeClockRate, dev));
    //CU_SAFE_CALL_NS(hipDeviceGetAttribute(&prop.textureAlign, CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT, dev));
    //#if CUDA_VERSION >= 2020
    //CU_SAFE_CALL_NS(hipDeviceGetAttribute(&prop.kernelExecTimeoutEnabled, CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT,dev));
    CU_SAFE_CALL_NS(hipDeviceGetAttribute(&prop.integrated, hipDeviceAttributeIntegrated, dev));
    //CU_SAFE_CALL_NS(hipDeviceGetAttribute(&prop.canMapHostMemory, CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY, dev));
    CU_SAFE_CALL_NS(hipDeviceGetAttribute(&prop.computeMode, hipDeviceAttributeComputeMode,dev));
    //#endif
    //#if CUDA_VERSION >= 3010
-    CU_SAFE_CALL_NS(hipDeviceGetAttribute(&prop.concurrentKernels, hipDeviceAttributeConcurrentKernels, dev));
+    prop.concurrentKernels = hip_prop.concurrentKernels;
    //CU_SAFE_CALL_NS(hipDeviceGetAttribute(&prop.ECCEnabled, CU_DEVICE_ATTRIBUTE_ECC_ENABLED, dev));
    //#endif