forked from mindspore-Ecosystem/mindspore
!13884 Add RANK_TABLE_FILE for PyNative
From: @jojobugfree Reviewed-by: @kisnwang,@chujinjin Signed-off-by: @chujinjin
This commit is contained in:
commit
2dc88b2cc0
|
@ -19,6 +19,7 @@
|
|||
#include "hccl/hccl.h"
|
||||
|
||||
constexpr auto kHcclConfigFile = "MINDSPORE_HCCL_CONFIG_PATH";
|
||||
constexpr auto kHcclConfigFileOld = "RANK_TABLE_FILE";
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
|
@ -37,10 +38,25 @@ bool HcclContext::InitHccl() {
|
|||
}
|
||||
auto config_file = std::getenv(kHcclConfigFile);
|
||||
if (config_file == nullptr) {
|
||||
MS_LOG(ERROR) << "Get hccl config file failed";
|
||||
config_file = std::getenv(kHcclConfigFileOld);
|
||||
if (config_file == nullptr) {
|
||||
MS_LOG(ERROR) << "Get hccl rank table file failed. Please export MINDSPORE_HCCL_CONFIG_PATH or RANK_TABLE_FILE";
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
auto rank_id = GetRankId();
|
||||
try {
|
||||
rank_id_ = std::stoi(rank_id);
|
||||
} catch (std::invalid_argument &e) {
|
||||
MS_LOG(ERROR) << "Invalid rankd id env:" << rank_id;
|
||||
return false;
|
||||
}
|
||||
|
||||
if (rank_id_ < 0 || rank_id_ > 7) {
|
||||
MS_LOG(ERROR) << "rank_id needs to be between 0-7";
|
||||
return false;
|
||||
}
|
||||
rank_id_ = std::stoi(GetRankId());
|
||||
|
||||
auto hccl_result = HcclCommInitClusterInfo(config_file, rank_id_, &hccl_comm_);
|
||||
if (hccl_result != HCCL_SUCCESS) {
|
||||
|
|
Loading…
Reference in New Issue