forked from mindspore-Ecosystem/mindspore
!49684 fix: aclFinalize repeat on lite
Merge pull request !49684 from guozhijian/fix_aclFinalize_repeat_lite
This commit is contained in:
commit
1395990caa
|
@ -15,7 +15,7 @@
|
|||
...
|
||||
n15075141 999
|
||||
|
||||
- **image_dir** (str) - ImageNet数据集的目录路径,目录中包含类似n02119789、n02100735、n02110185和n02096294的子目录。
|
||||
- **image_dir** (str) - ImageNet数据集的目录路径,目录中包含类似n01440764、n01443537、n01484850和n15075141的子目录。
|
||||
- **destination** (str) - 转换生成的MindRecord文件路径,需提前创建目录并且目录下不能存在同名文件。
|
||||
- **partition_number** (int,可选) - 生成MindRecord的文件个数。默认值:1。
|
||||
|
||||
|
|
|
@ -39,33 +39,36 @@ aclError AclInitAdapter::AclInit(const char *config_file) {
|
|||
aclError AclInitAdapter::AclFinalize() {
|
||||
std::lock_guard<std::mutex> lock(flag_mutex_);
|
||||
if (!init_flag_) {
|
||||
MS_LOG(INFO) << "Had been acl finalize.";
|
||||
return ACL_ERROR_NONE;
|
||||
}
|
||||
|
||||
MS_LOG(INFO) << "Begine acl finalize.";
|
||||
init_flag_ = false;
|
||||
return aclFinalize();
|
||||
}
|
||||
|
||||
aclError AclInitAdapter::ForceFinalize() {
|
||||
std::lock_guard<std::mutex> lock(flag_mutex_);
|
||||
MS_LOG(INFO) << "Begine force acl finalize.";
|
||||
init_flag_ = false;
|
||||
return aclFinalize();
|
||||
}
|
||||
|
||||
AclEnvGuard::AclEnvGuard() : errno_(AclInitAdapter::GetInstance().AclInit(nullptr)) {
|
||||
if (errno_ != ACL_ERROR_NONE && errno_ != ACL_ERROR_REPEAT_INITIALIZE) {
|
||||
MS_LOG(ERROR) << "Execute aclInit Failed";
|
||||
MS_LOG(ERROR) << "Execute aclInit Failed.";
|
||||
return;
|
||||
}
|
||||
MS_LOG(INFO) << "Acl init success";
|
||||
MS_LOG(INFO) << "Acl init success.";
|
||||
}
|
||||
|
||||
AclEnvGuard::~AclEnvGuard() {
|
||||
errno_ = AclInitAdapter::GetInstance().AclFinalize();
|
||||
if (errno_ != ACL_ERROR_NONE && errno_ != ACL_ERROR_REPEAT_FINALIZE) {
|
||||
MS_LOG(ERROR) << "Finalize acl failed";
|
||||
MS_LOG(ERROR) << "Finalize acl failed.";
|
||||
}
|
||||
MS_LOG(INFO) << "Acl finalize success";
|
||||
MS_LOG(INFO) << "Acl finalize success.";
|
||||
}
|
||||
|
||||
std::shared_ptr<AclEnvGuard> AclEnvGuard::GetAclEnv() {
|
||||
|
@ -77,11 +80,11 @@ std::shared_ptr<AclEnvGuard> AclEnvGuard::GetAclEnv() {
|
|||
acl_env = std::make_shared<AclEnvGuard>();
|
||||
aclError ret = acl_env->GetErrno();
|
||||
if (ret != ACL_ERROR_NONE && ret != ACL_ERROR_REPEAT_INITIALIZE) {
|
||||
MS_LOG(ERROR) << "Execute aclInit Failed";
|
||||
MS_LOG(ERROR) << "Execute aclInit Failed.";
|
||||
return nullptr;
|
||||
}
|
||||
global_acl_env_ = acl_env;
|
||||
MS_LOG(INFO) << "Acl init success";
|
||||
MS_LOG(INFO) << "Acl init success.";
|
||||
}
|
||||
return acl_env;
|
||||
}
|
||||
|
|
|
@ -17,11 +17,6 @@ if(NOT MSLITE_ENABLE_ACL)
|
|||
${DVPP_UTILS_SRC}
|
||||
${CMAKE_SOURCE_DIR}/mindspore/ccsrc/cxx_api/graph/acl/acl_env_guard.cc
|
||||
)
|
||||
else()
|
||||
set(DVPP_UTILS_SRC
|
||||
${DVPP_UTILS_SRC}
|
||||
${CMAKE_SOURCE_DIR}/../../mindspore/ccsrc/cxx_api/graph/acl/acl_env_guard.cc
|
||||
)
|
||||
endif()
|
||||
|
||||
add_library(dvpp_utils SHARED ${DVPP_UTILS_SRC})
|
||||
|
@ -30,7 +25,8 @@ enable_target_when_only_build_plugins(dvpp_utils)
|
|||
if(MSLITE_ENABLE_ACL)
|
||||
find_library(acl_dvpp libacl_dvpp.so ${ASCEND_CANN_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH})
|
||||
find_library(acl libascendcl.so ${ASCEND_CANN_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH})
|
||||
target_link_libraries(dvpp_utils PRIVATE minddata-lite ${acl} ${acl_dvpp} mindspore_core)
|
||||
# find acl_env_guard in ascend_kernel_plugin
|
||||
target_link_libraries(dvpp_utils PRIVATE ascend_kernel_plugin minddata-lite ${acl} ${acl_dvpp} mindspore_core)
|
||||
else()
|
||||
target_link_libraries(dvpp_utils PRIVATE _c_dataengine ${ACL} ${ACL_DVPP} mindspore_core mindspore_shared_lib)
|
||||
endif()
|
||||
|
|
|
@ -66,7 +66,7 @@ void ResourceManager::Release() {
|
|||
}
|
||||
|
||||
// finalize the acl when the process exit
|
||||
ret = mindspore::AclInitAdapter::GetInstance().AclFinalize();
|
||||
ret = AclInitAdapter::GetInstance().AclFinalize();
|
||||
if (ret != APP_ERR_OK) {
|
||||
MS_LOG(DEBUG) << "Failed to finalize acl, ret = " << ret << ".";
|
||||
}
|
||||
|
@ -96,7 +96,7 @@ APP_ERROR ResourceManager::InitResource(ResourceInfo &resourceInfo) {
|
|||
return APP_ERR_OK;
|
||||
}
|
||||
APP_ERROR ret = APP_ERR_OK;
|
||||
acl_env_ = mindspore::AclEnvGuard::GetAclEnv();
|
||||
acl_env_ = AclEnvGuard::GetAclEnv();
|
||||
if (acl_env_ == nullptr) {
|
||||
MS_LOG(ERROR) << "Failed to init acl.";
|
||||
return APP_ERR_COMM_FAILURE;
|
||||
|
|
|
@ -22,7 +22,15 @@
|
|||
#include <sys/stat.h>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
#ifndef BUILD_LITE
|
||||
#include "mindspore/ccsrc/cxx_api/graph/acl/acl_env_guard.h"
|
||||
using AclEnvGuard = mindspore::AclEnvGuard;
|
||||
using AclInitAdapter = mindspore::AclInitAdapter;
|
||||
#else
|
||||
#include "mindspore/lite/src/extendrt/kernel/ascend/model/acl_env_guard.h"
|
||||
using AclEnvGuard = mindspore::kernel::acl::AclEnvGuard;
|
||||
using AclInitAdapter = mindspore::kernel::acl::AclInitAdapter;
|
||||
#endif
|
||||
#include "minddata/dataset/kernels/image/dvpp/utils/CommonDataType.h"
|
||||
#include "minddata/dataset/kernels/image/dvpp/utils/ErrorCode.h"
|
||||
#include "minddata/dataset/kernels/image/dvpp/utils/resouce_info.h"
|
||||
|
@ -57,7 +65,7 @@ class ResourceManager {
|
|||
std::vector<int> deviceIds_;
|
||||
std::vector<aclrtContext> contexts_;
|
||||
std::unordered_map<int, int> deviceIdMap_; // Map of device to index
|
||||
std::shared_ptr<mindspore::AclEnvGuard> acl_env_;
|
||||
std::shared_ptr<AclEnvGuard> acl_env_;
|
||||
};
|
||||
|
||||
#endif
|
||||
|
|
|
@ -23,16 +23,83 @@ namespace acl {
|
|||
std::shared_ptr<AclEnvGuard> AclEnvGuard::global_acl_env_ = nullptr;
|
||||
std::mutex AclEnvGuard::global_acl_env_mutex_;
|
||||
|
||||
AclEnvGuard::AclEnvGuard(std::string_view cfg_file) {
|
||||
errno_ = aclInit(cfg_file.data());
|
||||
if (errno_ != ACL_ERROR_NONE && errno_ != ACL_ERROR_REPEAT_INITIALIZE) {
|
||||
MS_LOG(ERROR) << "Execute aclInit Failed";
|
||||
return;
|
||||
}
|
||||
MS_LOG(INFO) << "Acl init success";
|
||||
AclInitAdapter &AclInitAdapter::GetInstance() {
|
||||
static AclInitAdapter instance = {};
|
||||
return instance;
|
||||
}
|
||||
|
||||
AclEnvGuard::~AclEnvGuard() { (void)aclFinalize(); }
|
||||
aclError AclInitAdapter::AclInit(const char *config_file) {
|
||||
std::lock_guard<std::mutex> lock(flag_mutex_);
|
||||
if (init_flag_) {
|
||||
return ACL_ERROR_NONE;
|
||||
}
|
||||
|
||||
init_flag_ = true;
|
||||
return aclInit(config_file);
|
||||
}
|
||||
|
||||
aclError AclInitAdapter::AclFinalize() {
|
||||
std::lock_guard<std::mutex> lock(flag_mutex_);
|
||||
if (!init_flag_) {
|
||||
MS_LOG(INFO) << "Had been acl finalize.";
|
||||
return ACL_ERROR_NONE;
|
||||
}
|
||||
|
||||
MS_LOG(INFO) << "Begin acl finalize.";
|
||||
init_flag_ = false;
|
||||
return aclFinalize();
|
||||
}
|
||||
|
||||
aclError AclInitAdapter::ForceFinalize() {
|
||||
std::lock_guard<std::mutex> lock(flag_mutex_);
|
||||
MS_LOG(INFO) << "Begin force acl finalize.";
|
||||
init_flag_ = false;
|
||||
return aclFinalize();
|
||||
}
|
||||
|
||||
AclEnvGuard::AclEnvGuard() : errno_(AclInitAdapter::GetInstance().AclInit(nullptr)) {
|
||||
if (errno_ != ACL_ERROR_NONE && errno_ != ACL_ERROR_REPEAT_INITIALIZE) {
|
||||
MS_LOG(ERROR) << "Execute aclInit Failed.";
|
||||
return;
|
||||
}
|
||||
MS_LOG(INFO) << "Acl init success.";
|
||||
}
|
||||
|
||||
AclEnvGuard::AclEnvGuard(std::string_view cfg_file) : errno_(AclInitAdapter::GetInstance().AclInit(cfg_file.data())) {
|
||||
if (errno_ != ACL_ERROR_NONE && errno_ != ACL_ERROR_REPEAT_INITIALIZE) {
|
||||
MS_LOG(ERROR) << "Execute aclInit Failed.";
|
||||
return;
|
||||
}
|
||||
MS_LOG(INFO) << "Acl init success.";
|
||||
}
|
||||
|
||||
AclEnvGuard::~AclEnvGuard() {
|
||||
errno_ = AclInitAdapter::GetInstance().AclFinalize();
|
||||
if (errno_ != ACL_ERROR_NONE && errno_ != ACL_ERROR_REPEAT_FINALIZE) {
|
||||
MS_LOG(ERROR) << "Finalize acl failed.";
|
||||
}
|
||||
MS_LOG(INFO) << "Acl finalize success.";
|
||||
}
|
||||
|
||||
std::shared_ptr<AclEnvGuard> AclEnvGuard::GetAclEnv() {
|
||||
std::shared_ptr<AclEnvGuard> acl_env;
|
||||
|
||||
std::lock_guard<std::mutex> lock(global_acl_env_mutex_);
|
||||
acl_env = global_acl_env_;
|
||||
if (acl_env != nullptr) {
|
||||
MS_LOG(INFO) << "Acl has been initialized, skip.";
|
||||
} else {
|
||||
acl_env = std::make_shared<AclEnvGuard>();
|
||||
aclError ret = acl_env->GetErrno();
|
||||
if (ret != ACL_ERROR_NONE && ret != ACL_ERROR_REPEAT_INITIALIZE) {
|
||||
MS_LOG(ERROR) << "Execute aclInit Failed.";
|
||||
return nullptr;
|
||||
}
|
||||
global_acl_env_ = acl_env;
|
||||
MS_LOG(INFO) << "Acl init success.";
|
||||
}
|
||||
return acl_env;
|
||||
}
|
||||
|
||||
std::shared_ptr<AclEnvGuard> AclEnvGuard::GetAclEnv(std::string_view cfg_file) {
|
||||
std::shared_ptr<AclEnvGuard> acl_env;
|
||||
|
@ -48,11 +115,11 @@ std::shared_ptr<AclEnvGuard> AclEnvGuard::GetAclEnv(std::string_view cfg_file) {
|
|||
acl_env = std::make_shared<AclEnvGuard>(cfg_file);
|
||||
aclError ret = acl_env->GetErrno();
|
||||
if (ret != ACL_ERROR_NONE && ret != ACL_ERROR_REPEAT_INITIALIZE) {
|
||||
MS_LOG(ERROR) << "Execute aclInit Failed";
|
||||
MS_LOG(ERROR) << "Execute aclInit Failed.";
|
||||
return nullptr;
|
||||
}
|
||||
global_acl_env_ = acl_env;
|
||||
MS_LOG(INFO) << "Acl init success";
|
||||
MS_LOG(INFO) << "Acl init success.";
|
||||
}
|
||||
return acl_env;
|
||||
}
|
||||
|
|
|
@ -23,11 +23,28 @@
|
|||
|
||||
namespace mindspore::kernel {
|
||||
namespace acl {
|
||||
class AclInitAdapter {
|
||||
public:
|
||||
static AclInitAdapter &GetInstance();
|
||||
aclError AclInit(const char *config_file);
|
||||
aclError AclFinalize();
|
||||
aclError ForceFinalize();
|
||||
|
||||
private:
|
||||
AclInitAdapter() : init_flag_(false) {}
|
||||
~AclInitAdapter() = default;
|
||||
|
||||
bool init_flag_;
|
||||
std::mutex flag_mutex_;
|
||||
};
|
||||
|
||||
class AclEnvGuard {
|
||||
public:
|
||||
AclEnvGuard();
|
||||
explicit AclEnvGuard(std::string_view cfg_file);
|
||||
~AclEnvGuard();
|
||||
aclError GetErrno() const { return errno_; }
|
||||
static std::shared_ptr<AclEnvGuard> GetAclEnv();
|
||||
static std::shared_ptr<AclEnvGuard> GetAclEnv(std::string_view cfg_file);
|
||||
|
||||
private:
|
||||
|
|
|
@ -23,16 +23,83 @@ namespace acl {
|
|||
std::shared_ptr<AclEnvGuard> AclEnvGuard::global_acl_env_ = nullptr;
|
||||
std::mutex AclEnvGuard::global_acl_env_mutex_;
|
||||
|
||||
AclEnvGuard::AclEnvGuard(std::string_view cfg_file) : errno_(ACL_ERROR_NONE) {
|
||||
errno_ = aclInit(cfg_file.data());
|
||||
if (errno_ != ACL_ERROR_NONE && errno_ != ACL_ERROR_REPEAT_INITIALIZE) {
|
||||
MS_LOG(ERROR) << "Execute aclInit Failed";
|
||||
return;
|
||||
}
|
||||
MS_LOG(INFO) << "Acl init success";
|
||||
AclInitAdapter &AclInitAdapter::GetInstance() {
|
||||
static AclInitAdapter instance = {};
|
||||
return instance;
|
||||
}
|
||||
|
||||
AclEnvGuard::~AclEnvGuard() { (void)aclFinalize(); }
|
||||
aclError AclInitAdapter::AclInit(const char *config_file) {
|
||||
std::lock_guard<std::mutex> lock(flag_mutex_);
|
||||
if (init_flag_) {
|
||||
return ACL_ERROR_NONE;
|
||||
}
|
||||
|
||||
init_flag_ = true;
|
||||
return aclInit(config_file);
|
||||
}
|
||||
|
||||
aclError AclInitAdapter::AclFinalize() {
|
||||
std::lock_guard<std::mutex> lock(flag_mutex_);
|
||||
if (!init_flag_) {
|
||||
MS_LOG(INFO) << "Had been acl finalize.";
|
||||
return ACL_ERROR_NONE;
|
||||
}
|
||||
|
||||
MS_LOG(INFO) << "Begine acl finalize.";
|
||||
init_flag_ = false;
|
||||
return aclFinalize();
|
||||
}
|
||||
|
||||
aclError AclInitAdapter::ForceFinalize() {
|
||||
std::lock_guard<std::mutex> lock(flag_mutex_);
|
||||
MS_LOG(INFO) << "Begine force acl finalize.";
|
||||
init_flag_ = false;
|
||||
return aclFinalize();
|
||||
}
|
||||
|
||||
AclEnvGuard::AclEnvGuard() : errno_(AclInitAdapter::GetInstance().AclInit(nullptr)) {
|
||||
if (errno_ != ACL_ERROR_NONE && errno_ != ACL_ERROR_REPEAT_INITIALIZE) {
|
||||
MS_LOG(ERROR) << "Execute aclInit Failed.";
|
||||
return;
|
||||
}
|
||||
MS_LOG(INFO) << "Acl init success.";
|
||||
}
|
||||
|
||||
AclEnvGuard::AclEnvGuard(std::string_view cfg_file) : errno_(AclInitAdapter::GetInstance().AclInit(cfg_file.data())) {
|
||||
if (errno_ != ACL_ERROR_NONE && errno_ != ACL_ERROR_REPEAT_INITIALIZE) {
|
||||
MS_LOG(ERROR) << "Execute aclInit Failed.";
|
||||
return;
|
||||
}
|
||||
MS_LOG(INFO) << "Acl init success.";
|
||||
}
|
||||
|
||||
AclEnvGuard::~AclEnvGuard() {
|
||||
errno_ = AclInitAdapter::GetInstance().AclFinalize();
|
||||
if (errno_ != ACL_ERROR_NONE && errno_ != ACL_ERROR_REPEAT_FINALIZE) {
|
||||
MS_LOG(ERROR) << "Finalize acl failed.";
|
||||
}
|
||||
MS_LOG(INFO) << "Acl finalize success.";
|
||||
}
|
||||
|
||||
std::shared_ptr<AclEnvGuard> AclEnvGuard::GetAclEnv() {
|
||||
std::shared_ptr<AclEnvGuard> acl_env;
|
||||
|
||||
std::lock_guard<std::mutex> lock(global_acl_env_mutex_);
|
||||
acl_env = global_acl_env_;
|
||||
if (acl_env != nullptr) {
|
||||
MS_LOG(INFO) << "Acl has been initialized, skip.";
|
||||
} else {
|
||||
acl_env = std::make_shared<AclEnvGuard>();
|
||||
aclError ret = acl_env->GetErrno();
|
||||
if (ret != ACL_ERROR_NONE && ret != ACL_ERROR_REPEAT_INITIALIZE) {
|
||||
MS_LOG(ERROR) << "Execute aclInit Failed.";
|
||||
return nullptr;
|
||||
}
|
||||
global_acl_env_ = acl_env;
|
||||
MS_LOG(INFO) << "Acl init success.";
|
||||
}
|
||||
return acl_env;
|
||||
}
|
||||
|
||||
std::shared_ptr<AclEnvGuard> AclEnvGuard::GetAclEnv(std::string_view cfg_file) {
|
||||
std::shared_ptr<AclEnvGuard> acl_env;
|
||||
|
@ -48,11 +115,11 @@ std::shared_ptr<AclEnvGuard> AclEnvGuard::GetAclEnv(std::string_view cfg_file) {
|
|||
acl_env = std::make_shared<AclEnvGuard>(cfg_file);
|
||||
aclError ret = acl_env->GetErrno();
|
||||
if (ret != ACL_ERROR_NONE && ret != ACL_ERROR_REPEAT_INITIALIZE) {
|
||||
MS_LOG(ERROR) << "Execute aclInit Failed";
|
||||
MS_LOG(ERROR) << "Execute aclInit Failed.";
|
||||
return nullptr;
|
||||
}
|
||||
global_acl_env_ = acl_env;
|
||||
MS_LOG(INFO) << "Acl init success";
|
||||
MS_LOG(INFO) << "Acl init success.";
|
||||
}
|
||||
return acl_env;
|
||||
}
|
||||
|
|
|
@ -23,11 +23,28 @@
|
|||
|
||||
namespace mindspore::kernel {
|
||||
namespace acl {
|
||||
class AclInitAdapter {
|
||||
public:
|
||||
static AclInitAdapter &GetInstance();
|
||||
aclError AclInit(const char *config_file);
|
||||
aclError AclFinalize();
|
||||
aclError ForceFinalize();
|
||||
|
||||
private:
|
||||
AclInitAdapter() : init_flag_(false) {}
|
||||
~AclInitAdapter() = default;
|
||||
|
||||
bool init_flag_;
|
||||
std::mutex flag_mutex_;
|
||||
};
|
||||
|
||||
class AclEnvGuard {
|
||||
public:
|
||||
AclEnvGuard();
|
||||
explicit AclEnvGuard(std::string_view cfg_file);
|
||||
~AclEnvGuard();
|
||||
aclError GetErrno() const { return errno_; }
|
||||
static std::shared_ptr<AclEnvGuard> GetAclEnv();
|
||||
static std::shared_ptr<AclEnvGuard> GetAclEnv(std::string_view cfg_file);
|
||||
|
||||
private:
|
||||
|
|
|
@ -326,7 +326,7 @@ class FileWriter:
|
|||
# check the status of worker process
|
||||
for i in range(len(self._paths)):
|
||||
if not self._workers[i].is_alive():
|
||||
raise RuntimeError("Worker process(pid:{}) has stopped. Please check " \
|
||||
raise RuntimeError("Worker process(pid:{}) has stopped abnormal. Please check " \
|
||||
"the above log".format(self._workers[i].pid))
|
||||
continue
|
||||
return SUCCESS
|
||||
|
|
|
@ -33,7 +33,7 @@ class ImageNetToMR:
|
|||
Args:
|
||||
map_file (str): The map file that indicates label. This file can be generated by command
|
||||
:code:`ls -l [image_dir] | grep -vE "total|\." | awk -F " " '{print $9, NR-1;}' > [file_path]` ,
|
||||
where `image_dir` is image directory contains n02119789, n02100735, n02110185 and n02096294 directory
|
||||
where `image_dir` is image directory contains n01440764, n01443537, n01484850 and n15075141 directory
|
||||
and `file_path` is the generated `map_file` . An example of `map_file` is as below:
|
||||
|
||||
.. code-block::
|
||||
|
@ -45,7 +45,7 @@ class ImageNetToMR:
|
|||
...
|
||||
n15075141 999
|
||||
|
||||
image_dir (str): Image directory contains n02119789, n02100735, n02110185 and n02096294 directory.
|
||||
image_dir (str): Image directory contains n01440764, n01443537, n01484850 and n15075141 directory.
|
||||
destination (str): MindRecord file path to transform into, ensure that the directory is created in advance and
|
||||
no file with the same name exists in the directory.
|
||||
partition_number (int, optional): The partition size. Default: 1.
|
||||
|
|
Loading…
Reference in New Issue