From ab2135d32c2b089c5e720113d84b2485c8455b96 Mon Sep 17 00:00:00 2001 From: gaoyong10 Date: Tue, 9 Feb 2021 09:42:49 +0800 Subject: [PATCH] fix cuda version check --- mindspore/_check_version.py | 132 ++++++++++++++++++++++++------------ 1 file changed, 88 insertions(+), 44 deletions(-) diff --git a/mindspore/_check_version.py b/mindspore/_check_version.py index e70eeed4841..6fedea2cd3c 100644 --- a/mindspore/_check_version.py +++ b/mindspore/_check_version.py @@ -18,6 +18,7 @@ import sys import subprocess from pathlib import Path from abc import abstractmethod, ABCMeta +import numpy as np from packaging import version from . import log as logger from .version import __version__ @@ -41,74 +42,117 @@ class EnvChecker(metaclass=ABCMeta): class GPUEnvChecker(EnvChecker): - """gpu environment check""" + """GPU environment check.""" def __init__(self): self.version = ["10.1"] - self.cuda_path = "/usr/local/cuda" - if os.path.exists(self.cuda_path): - # cuda default path - self.cuda_bin = self.cuda_path + "/bin" - self.cuda_lib = self.cuda_path + "/lib64" - self.cuda_version = self.cuda_path + "/version.txt" - else: - # custom or unknown environment - self.cuda_path = "" - self.cuda_bin = "" - self.cuda_lib = "" - self.cuda_version = "" - # env self.path = os.getenv("PATH") self.ld_lib_path = os.getenv("LD_LIBRARY_PATH") # check - self.path_check = "/cuda" - self.ld_lib_path_check = "/cuda" self.v = "0" + self.cuda_lib_path = self._get_lib_path("libcu") + self.cuda_bin_path = self._get_bin_path("cuda") def check_env(self, e): - self._check_env() raise e def set_env(self): - if not self.cuda_bin: - self._check_env() - return + return - if Path(self.cuda_bin).is_dir(): - os.environ['PATH'] = self.cuda_bin + ":" + os.environ['PATH'] - else: - raise EnvironmentError( - f"No such directory: {self.cuda_bin}, please check if cuda is installed correctly.") + def _get_bin_path(self, bin_name): + """Get bin path by bin name.""" + if bin_name == "cuda": + return self._get_cuda_bin_path() + return [] + + def _get_cuda_bin_path(self): + """Get cuda bin path by lib path.""" + path_list = [] + for path in self.cuda_lib_path: + path = os.path.abspath(path.strip()+"/bin/") + if Path(path).is_dir(): + path_list.append(path) + return np.unique(path_list) + + def _get_nvcc_version(self, is_set_env): + """Get cuda version by nvcc command.""" + nvcc_result = subprocess.run(["nvcc --version | grep release"], + timeout=3, text=True, capture_output=True, check=False, shell=True) + if nvcc_result.returncode: + if not is_set_env: + for path in self.cuda_bin_path: + if Path(path + "/nvcc").is_file(): + os.environ['PATH'] = path + ":" + os.environ['PATH'] + return self._get_nvcc_version(True) + return "" + result = nvcc_result.stdout + for line in result.split('\n'): + if line: + return line.strip().split("release")[1].split(",")[0].strip() + return "" def check_version(self): - if not Path(self.cuda_version).is_file(): - logger.warning("Using custom cuda path, cuda version checking is skipped, please make sure " - "cuda version is supported, you can reference to the installation guidelines " - "https://www.mindspore.cn/install") - return + """Check cuda version.""" + version_match = False + for path in self.cuda_lib_path: + version_file = path + "/version.txt" + if not Path(version_file).is_file(): + continue + if self._check_version(version_file): + version_match = True + break + if not version_match: + if self.v == "0": + logger.warning("Cuda version file version.txt is not found, please confirm that the correct " + "cuda version has been installed, you can refer to the " + "installation guidelines: https://www.mindspore.cn/install") + else: + logger.warning(f"MindSpore version {__version__} and cuda version {self.v} does not match, " + "please refer to the installation guide for version matching " + "information: https://www.mindspore.cn/install") + nvcc_version = self._get_nvcc_version(False) + if nvcc_version and (nvcc_version not in self.version): + logger.warning(f"MindSpore version {__version__} and nvcc(cuda bin) version {nvcc_version} " + "does not match, please refer to the installation guide for version matching " + "information: https://www.mindspore.cn/install") - v = self._read_version(self.cuda_version) + def _check_version(self, version_file): + """Check cuda version by version.txt.""" + v = self._read_version(version_file) v = version.parse(v) v_str = str(v.major) + "." + str(v.minor) if v_str not in self.version: - logger.warning(f"MindSpore version {__version__} and cuda version {v_str} does not match, " - "reference to the match info on: https://www.mindspore.cn/install") + return False + return True - def _check_env(self): - """gpu cuda path check""" - if self.path is None or self.path_check not in self.path: - logger.warning("Can not find nvcc compiler(need by mindspore-gpu), please check if you have set env " - "PATH, you can reference to the installation guidelines https://www.mindspore.cn/install") - - if self.ld_lib_path is None or self.ld_lib_path_check not in self.ld_lib_path: - logger.warning("Can not find cuda so(need by mindspore-gpu), please check if you have set env " - "LD_LIBRARY_PATH, you can reference to the installation guidelines " - "https://www.mindspore.cn/install") + def _get_lib_path(self, lib_name): + """Get gpu lib path by ldd command.""" + path_list = [] + current_path = os.path.split(os.path.realpath(__file__))[0] + ldd_result = subprocess.run(["ldd " + current_path + "/_c_expression*.so* | grep " + lib_name], + timeout=3, text=True, capture_output=True, check=False, shell=True) + if ldd_result.returncode: + logger.warning(f"{lib_name} so(need by mndspore-gpu) is not found, please confirm that " + f"_c_experssion.so depend on {lib_name}, " + f"and _c_expression.so in directory:{current_path}") + return path_list + result = ldd_result.stdout + for i in result.split('\n'): + path = i.partition("=>")[2] + if path.lower().find("not found") > 0: + logger.warning(f"Cuda {self.version} version(need by mindspore-gpu) is not found, please confirm " + "that the path of cuda is set to the env LD_LIBRARY_PATH, please refer to the " + "installation guidelines: https://www.mindspore.cn/install") + continue + path = path.partition(lib_name)[0] + if path: + path_list.append(os.path.abspath(path.strip() + "../")) + return np.unique(path_list) def _read_version(self, file_path): - """get gpu version info""" + """Get gpu version info in version.txt.""" with open(file_path, 'r') as f: all_info = f.readlines() for line in all_info: