!38002 GPU optimize build

Merge pull request !38002 from VectorSL/upgrade-cuda
This commit is contained in:
i-robot 2022-08-16 11:05:24 +00:00 committed by Gitee
commit 766ba92cf7
No known key found for this signature in database
GPG Key ID: 173E9B9CA92EEF8F
7 changed files with 104 additions and 11 deletions

View File

@ -21,6 +21,80 @@ if(ENABLE_D OR ENABLE_ACL)
set(MINDSPORE_RPATH ${MINDSPORE_RPATH}:/usr/local/Ascend/latest/opp/op_impl/built-in/ai_core/tbe/op_tiling) set(MINDSPORE_RPATH ${MINDSPORE_RPATH}:/usr/local/Ascend/latest/opp/op_impl/built-in/ai_core/tbe/op_tiling)
endif() endif()
## Function for setting NVCC flag
function(set_nvcc_flag CUDA_NVCC_FLAGS)
# Detect gpu archs by cudaGetDeviceProperties.
message("Detect gpu arch on this device.")
set(cu_file "${CMAKE_SOURCE_DIR}/build/mindspore/ccsrc/get_device_compute_capabilities.cu")
file(WRITE ${cu_file} ""
"#include <cuda_runtime.h>\n"
"#include <cstdio>\n"
"int main () {\n"
" int dev_num = 0;\n"
" if (cudaGetDeviceCount(&dev_num) != cudaSuccess) return -1;\n"
" if (dev_num < 1) return -1;\n"
" for (int dev_id = 0; dev_id < dev_num; ++dev_id) {\n"
" cudaDeviceProp prop;"
" if (cudaGetDeviceProperties(&prop, dev_id) == cudaSuccess) {\n"
" printf(\"%d.%d \", prop.major, prop.minor);\n"
" }\n"
" }\n"
" return 0;\n"
"}\n")
# Build and run cu_file, get the result from properties.
try_run(RUN_RESULT_VAR COMPILE_RESULT_VAR ${CMAKE_SOURCE_DIR}/build/mindspore/ccsrc/ ${cu_file}
CMAKE_FLAGS "-DINCLUDE_DIRECTORIES=${CUDA_INCLUDE_DIRS}"
LINK_LIBRARIES ${CUDA_PATH}/lib64/libcudart.so
RUN_OUTPUT_VARIABLE compute_cap)
set(cuda_archs_bin)
if(RUN_RESULT_VAR EQUAL 0)
string(REGEX REPLACE "[ \t]+" ";" compute_cap "${compute_cap}")
list(REMOVE_DUPLICATES compute_cap)
foreach(arch ${compute_cap})
set(arch_bin)
if(arch MATCHES "^([0-9]\\.[0-9](\\[0-9]\\.[0-9]\\))?)$")
set(arch_bin ${CMAKE_MATCH_1})
else()
message(FATAL_ERROR "Unknown CUDA arch Name ${arch} !")
endif()
if(NOT arch_bin)
message(FATAL_ERROR "arch_bin was not set !")
endif()
list(APPEND cuda_archs_bin ${arch_bin})
endforeach()
# Get build flag from env to choose common/auto build.
set(NVCC_ARCH_FLAG_FROM_ENV $ENV{CUDA_ARCH})
if(NVCC_ARCH_FLAG_FROM_ENV STREQUAL "common")
foreach(arch ${cuda_archs_bin})
if(arch VERSION_LESS "7.0") # For common build, we need a Volta arch at least.
message(FATAL_ERROR "The device arch must >= 7.0 to build common archs. But got " ${arch}
" Using -G auto is recommended to detect device arch automatically.")
endif()
endforeach()
message("Build common archs for release.")
list(APPEND CUDA_NVCC_FLAGS -gencode=arch=compute_53,code=sm_53
-gencode=arch=compute_60,code=sm_60
-gencode=arch=compute_62,code=sm_62
-gencode=arch=compute_70,code=sm_70
-gencode=arch=compute_72,code=sm_72
-gencode=arch=compute_75,code=compute_75
--expt-relaxed-constexpr)
else()
message("Auto build for arch(s) " ${cuda_archs_bin})
string(REGEX REPLACE "\\." "" cuda_archs_bin "${cuda_archs_bin}")
string(REGEX MATCHALL "[0-9()]+" cuda_archs_bin "${cuda_archs_bin}")
foreach(arch ${cuda_archs_bin})
list(APPEND CUDA_NVCC_FLAGS -gencode=arch=compute_${arch},code=sm_${arch})
endforeach()
list(APPEND CUDA_NVCC_FLAGS --expt-relaxed-constexpr)
endif()
else()
message("Failed to detect gpu arch automatically, build a base arch 5.3.")
list(APPEND CUDA_NVCC_FLAGS -arch=sm_53 --expt-relaxed-constexpr)
endif()
set(${CUDA_NVCC_FLAGS} ${${CUDA_NVCC_FLAGS}} PARENT_SCOPE)
endfunction()
if(ENABLE_GPU) if(ENABLE_GPU)
find_package(CUDA REQUIRED) find_package(CUDA REQUIRED)
find_package(Threads) find_package(Threads)
@ -83,8 +157,10 @@ if(ENABLE_GPU)
message("CUBLAS_LIBRARY_PATH: ${CUBLAS_LIBRARY_PATH}") message("CUBLAS_LIBRARY_PATH: ${CUBLAS_LIBRARY_PATH}")
message("CUPTI_INCLUDE_DIRS: ${CUPTI_INCLUDE_DIRS}") message("CUPTI_INCLUDE_DIRS: ${CUPTI_INCLUDE_DIRS}")
include_directories(${CUDNN_INCLUDE_PATH} ${CUDA_PATH} ${CUDA_INCLUDE_DIRS} ${CUPTI_INCLUDE_DIRS}) include_directories(${CUDNN_INCLUDE_PATH} ${CUDA_PATH} ${CUDA_INCLUDE_DIRS} ${CUPTI_INCLUDE_DIRS})
## set NVCC ARCH FLAG
list(APPEND CUDA_NVCC_FLAGS -arch=sm_53 --expt-relaxed-constexpr) set(CUDA_NVCC_FLAGS)
set_nvcc_flag(CUDA_NVCC_FLAGS)
add_definitions(-Wno-unknown-pragmas) # Avoid compilation warnings from cuda/thrust
if("${CMAKE_BUILD_TYPE}" STREQUAL "Debug") if("${CMAKE_BUILD_TYPE}" STREQUAL "Debug")
list(APPEND CUDA_NVCC_FLAGS -G) list(APPEND CUDA_NVCC_FLAGS -G)
message("CUDA_NVCC_FLAGS" ${CUDA_NVCC_FLAGS}) message("CUDA_NVCC_FLAGS" ${CUDA_NVCC_FLAGS})

View File

@ -46,7 +46,7 @@ class GPUEnvChecker(EnvChecker):
"""GPU environment check.""" """GPU environment check."""
def __init__(self): def __init__(self):
self.version = ["10.1", "11.1"] self.version = ["10.1", "11.1", "11.6"]
self.lib_key_to_lib_name = {'libcu': 'libcuda.so'} self.lib_key_to_lib_name = {'libcu': 'libcuda.so'}
# env # env
self.path = os.getenv("PATH") self.path = os.getenv("PATH")

View File

@ -1,5 +1,5 @@
#!/bin/bash #!/bin/bash
# Copyright 2021 Huawei Technologies Co., Ltd # Copyright 2021-2022 Huawei Technologies Co., Ltd
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
@ -63,4 +63,5 @@ init_default_options()
export USER_ENABLE_DEBUGGER=false export USER_ENABLE_DEBUGGER=false
export ENABLE_SYM_FILE="off" export ENABLE_SYM_FILE="off"
export ENABLE_FAST_HASH_TABLE="on" export ENABLE_FAST_HASH_TABLE="on"
export CUDA_ARCH="auto"
} }

View File

@ -1,5 +1,5 @@
#!/bin/bash #!/bin/bash
# Copyright 2021 Huawei Technologies Co., Ltd # Copyright 2021-2022 Huawei Technologies Co., Ltd
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
@ -83,3 +83,15 @@ build_option_proc_z()
export COMPILE_MINDDATA="off" export COMPILE_MINDDATA="off"
fi fi
} }
build_option_proc_upper_g()
{
if [[ "X$OPTARG" == "Xcommon" || "X$OPTARG" == "Xauto" ]]; then
export CUDA_ARCH=$OPTARG
else
echo "Invalid value $OPTARG for option -G"
usage
exit 1
fi
echo "build gpu for arch $OPTARG"
}

View File

@ -1,5 +1,5 @@
#!/bin/bash #!/bin/bash
# Copyright 2021 Huawei Technologies Co., Ltd # Copyright 2021-2022 Huawei Technologies Co., Ltd
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
@ -34,7 +34,7 @@ parse_device()
if [[ "X$DEVICE_VERSION" == "X" ]]; then if [[ "X$DEVICE_VERSION" == "X" ]]; then
DEVICE_VERSION=10.1 DEVICE_VERSION=10.1
fi fi
if [[ "X$DEVICE_VERSION" != "X11.1" && "X$DEVICE_VERSION" != "X10.1" ]]; then if [[ "X$DEVICE_VERSION" != "X11.6" && "X$DEVICE_VERSION" != "X11.1" && "X$DEVICE_VERSION" != "X10.1" ]]; then
echo "Invalid value ${DEVICE_VERSION} for option -V" echo "Invalid value ${DEVICE_VERSION} for option -V"
usage usage
exit 1 exit 1

View File

@ -1,5 +1,5 @@
#!/bin/bash #!/bin/bash
# Copyright 2021 Huawei Technologies Co., Ltd # Copyright 2021-2022 Huawei Technologies Co., Ltd
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
@ -20,7 +20,7 @@ set -e
process_options() process_options()
{ {
# Process the options # Process the options
while getopts 'drvj:c:t:hb:s:a:g:p:ie:l:I:RP:D:zM:V:K:B:En:A:S:k:W:F:H:L:y' opt while getopts 'drvj:c:t:hb:s:a:g:p:ie:l:I:RP:D:zM:V:K:B:En:A:S:k:W:F:H:L:yG:' opt
do do
CASE_SENSIVE_ARG=${OPTARG} CASE_SENSIVE_ARG=${OPTARG}
OPTARG=$(echo ${OPTARG} | tr '[A-Z]' '[a-z]') OPTARG=$(echo ${OPTARG} | tr '[A-Z]' '[a-z]')
@ -105,6 +105,8 @@ process_options()
export ENABLE_TRT="on" export ENABLE_TRT="on"
export TENSORRT_HOME="$CASE_SENSIVE_ARG" export TENSORRT_HOME="$CASE_SENSIVE_ARG"
echo "Link Tensor-RT library. Path: ${CASE_SENSIVE_ARG}" ;; echo "Link Tensor-RT library. Path: ${CASE_SENSIVE_ARG}" ;;
G)
build_option_proc_upper_g ;;
*) *)
echo "Unknown option ${opt}!" echo "Unknown option ${opt}!"
usage usage

View File

@ -1,5 +1,5 @@
#!/bin/bash #!/bin/bash
# Copyright 2021 Huawei Technologies Co., Ltd # Copyright 2021-2022 Huawei Technologies Co., Ltd
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
@ -24,7 +24,7 @@ usage()
echo " [-P on|off] [-z [on|off]] [-M on|off] [-V 10.1|11.1|310|910] [-I arm64|arm32|x86_64] [-K on|off] \\" echo " [-P on|off] [-z [on|off]] [-M on|off] [-V 10.1|11.1|310|910] [-I arm64|arm32|x86_64] [-K on|off] \\"
echo " [-B on|off] [-E] [-l on|off] [-n full|lite|off] [-H on|off] \\" echo " [-B on|off] [-E] [-l on|off] [-n full|lite|off] [-H on|off] \\"
echo " [-A on|off] [-S on|off] [-k on|off] [-W sse|neon|avx|avx512|off] \\" echo " [-A on|off] [-S on|off] [-k on|off] [-W sse|neon|avx|avx512|off] \\"
echo " [-L Tensor-RT path] [-y on|off] [-F on|off] \\" echo " [-L Tensor-RT path] [-y on|off] [-F on|off] [-G common|auto]\\"
echo "" echo ""
echo "Options:" echo "Options:"
echo " -d Debug mode" echo " -d Debug mode"
@ -63,4 +63,6 @@ usage()
echo " -L Link and specify Tensor-RT library path, default disable Tensor-RT lib linking" echo " -L Link and specify Tensor-RT library path, default disable Tensor-RT lib linking"
echo " -y Compile the symbol table switch and save the symbol table to the directory output" echo " -y Compile the symbol table switch and save the symbol table to the directory output"
echo " -F Use fast hash table in mindspore compiler, default on" echo " -F Use fast hash table in mindspore compiler, default on"
echo " -G Select an architecture to build, set 'common' to build with common architectures(eg. gpu: 5.3, 6.0, 6.2, 7.0, 7.2, 7.5),\\"
echo " set auto to detect automatically, default: 'auto'. Only effective for GPU currently."
} }