forked from mindspore-Ecosystem/mindspore
fix cuda version check
This commit is contained in:
parent
7296659f14
commit
ab2135d32c
|
@ -18,6 +18,7 @@ import sys
|
|||
import subprocess
|
||||
from pathlib import Path
|
||||
from abc import abstractmethod, ABCMeta
|
||||
import numpy as np
|
||||
from packaging import version
|
||||
from . import log as logger
|
||||
from .version import __version__
|
||||
|
@ -41,74 +42,117 @@ class EnvChecker(metaclass=ABCMeta):
|
|||
|
||||
|
||||
class GPUEnvChecker(EnvChecker):
|
||||
"""gpu environment check"""
|
||||
"""GPU environment check."""
|
||||
|
||||
def __init__(self):
|
||||
self.version = ["10.1"]
|
||||
self.cuda_path = "/usr/local/cuda"
|
||||
if os.path.exists(self.cuda_path):
|
||||
# cuda default path
|
||||
self.cuda_bin = self.cuda_path + "/bin"
|
||||
self.cuda_lib = self.cuda_path + "/lib64"
|
||||
self.cuda_version = self.cuda_path + "/version.txt"
|
||||
else:
|
||||
# custom or unknown environment
|
||||
self.cuda_path = ""
|
||||
self.cuda_bin = ""
|
||||
self.cuda_lib = ""
|
||||
self.cuda_version = ""
|
||||
|
||||
# env
|
||||
self.path = os.getenv("PATH")
|
||||
self.ld_lib_path = os.getenv("LD_LIBRARY_PATH")
|
||||
|
||||
# check
|
||||
self.path_check = "/cuda"
|
||||
self.ld_lib_path_check = "/cuda"
|
||||
self.v = "0"
|
||||
self.cuda_lib_path = self._get_lib_path("libcu")
|
||||
self.cuda_bin_path = self._get_bin_path("cuda")
|
||||
|
||||
def check_env(self, e):
|
||||
self._check_env()
|
||||
raise e
|
||||
|
||||
def set_env(self):
|
||||
if not self.cuda_bin:
|
||||
self._check_env()
|
||||
return
|
||||
return
|
||||
|
||||
if Path(self.cuda_bin).is_dir():
|
||||
os.environ['PATH'] = self.cuda_bin + ":" + os.environ['PATH']
|
||||
else:
|
||||
raise EnvironmentError(
|
||||
f"No such directory: {self.cuda_bin}, please check if cuda is installed correctly.")
|
||||
def _get_bin_path(self, bin_name):
|
||||
"""Get bin path by bin name."""
|
||||
if bin_name == "cuda":
|
||||
return self._get_cuda_bin_path()
|
||||
return []
|
||||
|
||||
def _get_cuda_bin_path(self):
|
||||
"""Get cuda bin path by lib path."""
|
||||
path_list = []
|
||||
for path in self.cuda_lib_path:
|
||||
path = os.path.abspath(path.strip()+"/bin/")
|
||||
if Path(path).is_dir():
|
||||
path_list.append(path)
|
||||
return np.unique(path_list)
|
||||
|
||||
def _get_nvcc_version(self, is_set_env):
|
||||
"""Get cuda version by nvcc command."""
|
||||
nvcc_result = subprocess.run(["nvcc --version | grep release"],
|
||||
timeout=3, text=True, capture_output=True, check=False, shell=True)
|
||||
if nvcc_result.returncode:
|
||||
if not is_set_env:
|
||||
for path in self.cuda_bin_path:
|
||||
if Path(path + "/nvcc").is_file():
|
||||
os.environ['PATH'] = path + ":" + os.environ['PATH']
|
||||
return self._get_nvcc_version(True)
|
||||
return ""
|
||||
result = nvcc_result.stdout
|
||||
for line in result.split('\n'):
|
||||
if line:
|
||||
return line.strip().split("release")[1].split(",")[0].strip()
|
||||
return ""
|
||||
|
||||
def check_version(self):
|
||||
if not Path(self.cuda_version).is_file():
|
||||
logger.warning("Using custom cuda path, cuda version checking is skipped, please make sure "
|
||||
"cuda version is supported, you can reference to the installation guidelines "
|
||||
"https://www.mindspore.cn/install")
|
||||
return
|
||||
"""Check cuda version."""
|
||||
version_match = False
|
||||
for path in self.cuda_lib_path:
|
||||
version_file = path + "/version.txt"
|
||||
if not Path(version_file).is_file():
|
||||
continue
|
||||
if self._check_version(version_file):
|
||||
version_match = True
|
||||
break
|
||||
if not version_match:
|
||||
if self.v == "0":
|
||||
logger.warning("Cuda version file version.txt is not found, please confirm that the correct "
|
||||
"cuda version has been installed, you can refer to the "
|
||||
"installation guidelines: https://www.mindspore.cn/install")
|
||||
else:
|
||||
logger.warning(f"MindSpore version {__version__} and cuda version {self.v} does not match, "
|
||||
"please refer to the installation guide for version matching "
|
||||
"information: https://www.mindspore.cn/install")
|
||||
nvcc_version = self._get_nvcc_version(False)
|
||||
if nvcc_version and (nvcc_version not in self.version):
|
||||
logger.warning(f"MindSpore version {__version__} and nvcc(cuda bin) version {nvcc_version} "
|
||||
"does not match, please refer to the installation guide for version matching "
|
||||
"information: https://www.mindspore.cn/install")
|
||||
|
||||
v = self._read_version(self.cuda_version)
|
||||
def _check_version(self, version_file):
|
||||
"""Check cuda version by version.txt."""
|
||||
v = self._read_version(version_file)
|
||||
v = version.parse(v)
|
||||
v_str = str(v.major) + "." + str(v.minor)
|
||||
if v_str not in self.version:
|
||||
logger.warning(f"MindSpore version {__version__} and cuda version {v_str} does not match, "
|
||||
"reference to the match info on: https://www.mindspore.cn/install")
|
||||
return False
|
||||
return True
|
||||
|
||||
def _check_env(self):
|
||||
"""gpu cuda path check"""
|
||||
if self.path is None or self.path_check not in self.path:
|
||||
logger.warning("Can not find nvcc compiler(need by mindspore-gpu), please check if you have set env "
|
||||
"PATH, you can reference to the installation guidelines https://www.mindspore.cn/install")
|
||||
|
||||
if self.ld_lib_path is None or self.ld_lib_path_check not in self.ld_lib_path:
|
||||
logger.warning("Can not find cuda so(need by mindspore-gpu), please check if you have set env "
|
||||
"LD_LIBRARY_PATH, you can reference to the installation guidelines "
|
||||
"https://www.mindspore.cn/install")
|
||||
def _get_lib_path(self, lib_name):
|
||||
"""Get gpu lib path by ldd command."""
|
||||
path_list = []
|
||||
current_path = os.path.split(os.path.realpath(__file__))[0]
|
||||
ldd_result = subprocess.run(["ldd " + current_path + "/_c_expression*.so* | grep " + lib_name],
|
||||
timeout=3, text=True, capture_output=True, check=False, shell=True)
|
||||
if ldd_result.returncode:
|
||||
logger.warning(f"{lib_name} so(need by mndspore-gpu) is not found, please confirm that "
|
||||
f"_c_experssion.so depend on {lib_name}, "
|
||||
f"and _c_expression.so in directory:{current_path}")
|
||||
return path_list
|
||||
result = ldd_result.stdout
|
||||
for i in result.split('\n'):
|
||||
path = i.partition("=>")[2]
|
||||
if path.lower().find("not found") > 0:
|
||||
logger.warning(f"Cuda {self.version} version(need by mindspore-gpu) is not found, please confirm "
|
||||
"that the path of cuda is set to the env LD_LIBRARY_PATH, please refer to the "
|
||||
"installation guidelines: https://www.mindspore.cn/install")
|
||||
continue
|
||||
path = path.partition(lib_name)[0]
|
||||
if path:
|
||||
path_list.append(os.path.abspath(path.strip() + "../"))
|
||||
return np.unique(path_list)
|
||||
|
||||
def _read_version(self, file_path):
|
||||
"""get gpu version info"""
|
||||
"""Get gpu version info in version.txt."""
|
||||
with open(file_path, 'r') as f:
|
||||
all_info = f.readlines()
|
||||
for line in all_info:
|
||||
|
|
Loading…
Reference in New Issue