!13884 Add RANK_TABLE_FILE for PyNative

From: @jojobugfree
Reviewed-by: @kisnwang,@chujinjin
Signed-off-by: @chujinjin
This commit is contained in:
mindspore-ci-bot 2021-03-24 14:53:51 +08:00 committed by Gitee
commit 2dc88b2cc0
1 changed files with 18 additions and 2 deletions

View File

@ -19,6 +19,7 @@
#include "hccl/hccl.h"
constexpr auto kHcclConfigFile = "MINDSPORE_HCCL_CONFIG_PATH";
constexpr auto kHcclConfigFileOld = "RANK_TABLE_FILE";
namespace mindspore {
namespace kernel {
@ -37,10 +38,25 @@ bool HcclContext::InitHccl() {
}
auto config_file = std::getenv(kHcclConfigFile);
if (config_file == nullptr) {
MS_LOG(ERROR) << "Get hccl config file failed";
config_file = std::getenv(kHcclConfigFileOld);
if (config_file == nullptr) {
MS_LOG(ERROR) << "Get hccl rank table file failed. Please export MINDSPORE_HCCL_CONFIG_PATH or RANK_TABLE_FILE";
return false;
}
}
auto rank_id = GetRankId();
try {
rank_id_ = std::stoi(rank_id);
} catch (std::invalid_argument &e) {
MS_LOG(ERROR) << "Invalid rankd id env:" << rank_id;
return false;
}
if (rank_id_ < 0 || rank_id_ > 7) {
MS_LOG(ERROR) << "rank_id needs to be between 0-7";
return false;
}
rank_id_ = std::stoi(GetRankId());
auto hccl_result = HcclCommInitClusterInfo(config_file, rank_id_, &hccl_comm_);
if (hccl_result != HCCL_SUCCESS) {