From fe3f9bbb0e0c2e6745870c74d8eb489400afd1cd Mon Sep 17 00:00:00 2001
From: VectorSL <shiliang10@huawei.com>
Date: Fri, 8 Jul 2022 17:34:48 +0800
Subject: [PATCH] upgrade cuda

---
 mindspore/ccsrc/CMakeLists.txt                | 80 ++++++++++++++++++-
 .../mindspore/run_check/_check_version.py     |  2 +-
 scripts/build/default_options.sh              |  3 +-
 scripts/build/option_proc_mindspore.sh        | 14 +++-
 scripts/build/parse_device.sh                 |  4 +-
 scripts/build/process_options.sh              |  6 +-
 scripts/build/usage.sh                        |  6 +-
 7 files changed, 104 insertions(+), 11 deletions(-)
diff --git a/mindspore/ccsrc/CMakeLists.txt b/mindspore/ccsrc/CMakeLists.txt
index d47e0f6b6be..c26e23daf79 100644
--- a/mindspore/ccsrc/CMakeLists.txt
+++ b/mindspore/ccsrc/CMakeLists.txt
@@ -21,6 +21,80 @@ if(ENABLE_D OR ENABLE_ACL)
     set(MINDSPORE_RPATH ${MINDSPORE_RPATH}:/usr/local/Ascend/latest/opp/op_impl/built-in/ai_core/tbe/op_tiling)
 endif()
 
+## Function for setting NVCC flag
+function(set_nvcc_flag CUDA_NVCC_FLAGS)
+    # Detect gpu archs by cudaGetDeviceProperties.
+    message("Detect gpu arch on this device.")
+    set(cu_file "${CMAKE_SOURCE_DIR}/build/mindspore/ccsrc/get_device_compute_capabilities.cu")
+    file(WRITE ${cu_file} ""
+        "#include <cuda_runtime.h>\n"
+        "#include <cstdio>\n"
+        "int main () {\n"
+        " int dev_num = 0;\n"
+        " if (cudaGetDeviceCount(&dev_num) != cudaSuccess) return -1;\n"
+        " if (dev_num < 1) return -1;\n"
+        " for (int dev_id = 0; dev_id < dev_num; ++dev_id) {\n"
+        "    cudaDeviceProp prop;"
+        "    if (cudaGetDeviceProperties(&prop, dev_id) == cudaSuccess) {\n"
+        "      printf(\"%d.%d \", prop.major, prop.minor);\n"
+        "    }\n"
+        "  }\n"
+        "  return 0;\n"
+        "}\n")
+    # Build and run cu_file, get the result from properties.
+    try_run(RUN_RESULT_VAR COMPILE_RESULT_VAR ${CMAKE_SOURCE_DIR}/build/mindspore/ccsrc/ ${cu_file}
+            CMAKE_FLAGS "-DINCLUDE_DIRECTORIES=${CUDA_INCLUDE_DIRS}"
+            LINK_LIBRARIES ${CUDA_PATH}/lib64/libcudart.so
+            RUN_OUTPUT_VARIABLE compute_cap)
+    set(cuda_archs_bin)
+    if(RUN_RESULT_VAR EQUAL 0)
+        string(REGEX REPLACE "[ \t]+" ";" compute_cap "${compute_cap}")
+        list(REMOVE_DUPLICATES compute_cap)
+        foreach(arch ${compute_cap})
+            set(arch_bin)
+            if(arch MATCHES "^([0-9]\\.[0-9](\\[0-9]\\.[0-9]\\))?)$")
+                set(arch_bin ${CMAKE_MATCH_1})
+            else()
+                message(FATAL_ERROR "Unknown CUDA arch Name ${arch} !")
+            endif()
+            if(NOT arch_bin)
+                message(FATAL_ERROR "arch_bin was not set !")
+            endif()
+            list(APPEND cuda_archs_bin ${arch_bin})
+        endforeach()
+        # Get build flag from env to choose common/auto build.
+        set(NVCC_ARCH_FLAG_FROM_ENV $ENV{CUDA_ARCH})
+        if(NVCC_ARCH_FLAG_FROM_ENV STREQUAL "common")
+            foreach(arch ${cuda_archs_bin})
+                if(arch  VERSION_LESS "7.0") # For common build, we need a Volta arch at least.
+                    message(FATAL_ERROR "The device arch must >= 7.0 to build common archs. But got " ${arch}
+                            " Using -G auto is recommended to detect device arch automatically.")
+                endif()
+            endforeach()
+            message("Build common archs for release.")
+            list(APPEND CUDA_NVCC_FLAGS -gencode=arch=compute_53,code=sm_53
+                                        -gencode=arch=compute_60,code=sm_60
+                                        -gencode=arch=compute_62,code=sm_62
+                                        -gencode=arch=compute_70,code=sm_70
+                                        -gencode=arch=compute_72,code=sm_72
+                                        -gencode=arch=compute_75,code=compute_75
+                                        --expt-relaxed-constexpr)
+        else()
+            message("Auto build for arch(s) " ${cuda_archs_bin})
+            string(REGEX REPLACE "\\." "" cuda_archs_bin "${cuda_archs_bin}")
+            string(REGEX MATCHALL "[0-9()]+" cuda_archs_bin "${cuda_archs_bin}")
+            foreach(arch ${cuda_archs_bin})
+                list(APPEND CUDA_NVCC_FLAGS -gencode=arch=compute_${arch},code=sm_${arch})
+            endforeach()
+            list(APPEND CUDA_NVCC_FLAGS --expt-relaxed-constexpr)
+        endif()
+    else()
+        message("Failed to detect gpu arch automatically, build a base arch 5.3.")
+        list(APPEND CUDA_NVCC_FLAGS -arch=sm_53 --expt-relaxed-constexpr)
+    endif()
+    set(${CUDA_NVCC_FLAGS} ${${CUDA_NVCC_FLAGS}} PARENT_SCOPE)
+endfunction()
+
 if(ENABLE_GPU)
     find_package(CUDA REQUIRED)
     find_package(Threads)
@@ -83,8 +157,10 @@ if(ENABLE_GPU)
     message("CUBLAS_LIBRARY_PATH: ${CUBLAS_LIBRARY_PATH}")
     message("CUPTI_INCLUDE_DIRS: ${CUPTI_INCLUDE_DIRS}")
     include_directories(${CUDNN_INCLUDE_PATH} ${CUDA_PATH} ${CUDA_INCLUDE_DIRS} ${CUPTI_INCLUDE_DIRS})
-
-    list(APPEND CUDA_NVCC_FLAGS -arch=sm_53 --expt-relaxed-constexpr)
+    ## set NVCC ARCH FLAG
+    set(CUDA_NVCC_FLAGS)
+    set_nvcc_flag(CUDA_NVCC_FLAGS)
+    add_definitions(-Wno-unknown-pragmas) # Avoid compilation warnings from cuda/thrust
     if("${CMAKE_BUILD_TYPE}" STREQUAL "Debug")
         list(APPEND CUDA_NVCC_FLAGS -G)
         message("CUDA_NVCC_FLAGS" ${CUDA_NVCC_FLAGS})
diff --git a/mindspore/python/mindspore/run_check/_check_version.py b/mindspore/python/mindspore/run_check/_check_version.py
index 6b2f7b31753..fc908d85027 100644
--- a/mindspore/python/mindspore/run_check/_check_version.py
+++ b/mindspore/python/mindspore/run_check/_check_version.py
@@ -46,7 +46,7 @@ class GPUEnvChecker(EnvChecker):
     """GPU environment check."""
 
     def __init__(self):
-        self.version = ["10.1", "11.1"]
+        self.version = ["10.1", "11.1", "11.6"]
         self.lib_key_to_lib_name = {'libcu': 'libcuda.so'}
         # env
         self.path = os.getenv("PATH")
diff --git a/scripts/build/default_options.sh b/scripts/build/default_options.sh
index 1ed14171b25..14cbc689746 100755
--- a/scripts/build/default_options.sh
+++ b/scripts/build/default_options.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Copyright 2021 Huawei Technologies Co., Ltd
+# Copyright 2021-2022 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -63,4 +63,5 @@ init_default_options()
   export USER_ENABLE_DEBUGGER=false
   export ENABLE_SYM_FILE="off"
   export ENABLE_FAST_HASH_TABLE="on"
+  export CUDA_ARCH="auto"
 }
diff --git a/scripts/build/option_proc_mindspore.sh b/scripts/build/option_proc_mindspore.sh
index 3381e6cf23e..cca2d1228eb 100755
--- a/scripts/build/option_proc_mindspore.sh
+++ b/scripts/build/option_proc_mindspore.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Copyright 2021 Huawei Technologies Co., Ltd
+# Copyright 2021-2022 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -83,3 +83,15 @@ build_option_proc_z()
     export COMPILE_MINDDATA="off"
   fi
 }
+
+build_option_proc_upper_g()
+{
+  if [[ "X$OPTARG" == "Xcommon" || "X$OPTARG" == "Xauto" ]]; then
+    export CUDA_ARCH=$OPTARG
+  else
+    echo "Invalid value $OPTARG for option -G"
+    usage
+    exit 1
+  fi
+  echo "build gpu for arch $OPTARG"
+}
diff --git a/scripts/build/parse_device.sh b/scripts/build/parse_device.sh
index 4be3cce4c56..7b44af7be9e 100755
--- a/scripts/build/parse_device.sh
+++ b/scripts/build/parse_device.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Copyright 2021 Huawei Technologies Co., Ltd
+# Copyright 2021-2022 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -34,7 +34,7 @@ parse_device()
     if [[ "X$DEVICE_VERSION" == "X" ]]; then
       DEVICE_VERSION=10.1
     fi
-    if [[ "X$DEVICE_VERSION" != "X11.1" && "X$DEVICE_VERSION" != "X10.1" ]]; then
+    if [[ "X$DEVICE_VERSION" != "X11.6" && "X$DEVICE_VERSION" != "X11.1" && "X$DEVICE_VERSION" != "X10.1" ]]; then
       echo "Invalid value ${DEVICE_VERSION} for option -V"
       usage
       exit 1
diff --git a/scripts/build/process_options.sh b/scripts/build/process_options.sh
index 38db3b6dda7..e616768ee39 100755
--- a/scripts/build/process_options.sh
+++ b/scripts/build/process_options.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Copyright 2021 Huawei Technologies Co., Ltd
+# Copyright 2021-2022 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -20,7 +20,7 @@ set -e
 process_options()
 {
   # Process the options
-  while getopts 'drvj:c:t:hb:s:a:g:p:ie:l:I:RP:D:zM:V:K:B:En:A:S:k:W:F:H:L:y' opt
+  while getopts 'drvj:c:t:hb:s:a:g:p:ie:l:I:RP:D:zM:V:K:B:En:A:S:k:W:F:H:L:yG:' opt
   do
     CASE_SENSIVE_ARG=${OPTARG}
     OPTARG=$(echo ${OPTARG} | tr '[A-Z]' '[a-z]')
@@ -105,6 +105,8 @@ process_options()
         export ENABLE_TRT="on"
         export TENSORRT_HOME="$CASE_SENSIVE_ARG"
         echo "Link Tensor-RT library. Path: ${CASE_SENSIVE_ARG}" ;;
+      G)
+        build_option_proc_upper_g ;;
       *)
         echo "Unknown option ${opt}!"
         usage
diff --git a/scripts/build/usage.sh b/scripts/build/usage.sh
index 8b844c93ae1..53a022ecde4 100755
--- a/scripts/build/usage.sh
+++ b/scripts/build/usage.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Copyright 2021 Huawei Technologies Co., Ltd
+# Copyright 2021-2022 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -24,7 +24,7 @@ usage()
   echo "              [-P on|off] [-z [on|off]] [-M on|off] [-V 10.1|11.1|310|910] [-I arm64|arm32|x86_64] [-K on|off] \\"
   echo "              [-B on|off] [-E] [-l on|off] [-n full|lite|off] [-H on|off] \\"
   echo "              [-A on|off] [-S on|off] [-k on|off] [-W sse|neon|avx|avx512|off] \\"
-  echo "              [-L Tensor-RT path] [-y on|off] [-F on|off] \\"
+  echo "              [-L Tensor-RT path] [-y on|off] [-F on|off] [-G common|auto]\\"
   echo ""
   echo "Options:"
   echo "    -d Debug mode"
@@ -63,4 +63,6 @@ usage()
   echo "    -L Link and specify Tensor-RT library path, default disable Tensor-RT lib linking"
   echo "    -y Compile the symbol table switch and save the symbol table to the directory output"
   echo "    -F Use fast hash table in mindspore compiler, default on"
+  echo "    -G Select an architecture to build, set 'common' to build with common architectures(eg. gpu: 5.3, 6.0, 6.2, 7.0, 7.2, 7.5),\\"
+  echo "       set auto to detect automatically, default: 'auto'. Only effective for GPU currently."
 }