fix: aclFinalize repeat on lite

This commit is contained in:
jonyguo 2023-03-02 19:16:49 +08:00
parent 98143eb165
commit 503099e05e
11 changed files with 214 additions and 39 deletions

View File

@ -15,7 +15,7 @@
... ...
n15075141 999 n15075141 999
- **image_dir** (str) - ImageNet数据集的目录路径目录中包含类似n02119789、n02100735、n02110185和n02096294的子目录。 - **image_dir** (str) - ImageNet数据集的目录路径目录中包含类似n01440764、n01443537、n01484850和n15075141的子目录。
- **destination** (str) - 转换生成的MindRecord文件路径需提前创建目录并且目录下不能存在同名文件。 - **destination** (str) - 转换生成的MindRecord文件路径需提前创建目录并且目录下不能存在同名文件。
- **partition_number** (int可选) - 生成MindRecord的文件个数。默认值1。 - **partition_number** (int可选) - 生成MindRecord的文件个数。默认值1。

View File

@ -39,33 +39,36 @@ aclError AclInitAdapter::AclInit(const char *config_file) {
aclError AclInitAdapter::AclFinalize() { aclError AclInitAdapter::AclFinalize() {
std::lock_guard<std::mutex> lock(flag_mutex_); std::lock_guard<std::mutex> lock(flag_mutex_);
if (!init_flag_) { if (!init_flag_) {
MS_LOG(INFO) << "Had been acl finalize.";
return ACL_ERROR_NONE; return ACL_ERROR_NONE;
} }
MS_LOG(INFO) << "Begine acl finalize.";
init_flag_ = false; init_flag_ = false;
return aclFinalize(); return aclFinalize();
} }
aclError AclInitAdapter::ForceFinalize() { aclError AclInitAdapter::ForceFinalize() {
std::lock_guard<std::mutex> lock(flag_mutex_); std::lock_guard<std::mutex> lock(flag_mutex_);
MS_LOG(INFO) << "Begine force acl finalize.";
init_flag_ = false; init_flag_ = false;
return aclFinalize(); return aclFinalize();
} }
AclEnvGuard::AclEnvGuard() : errno_(AclInitAdapter::GetInstance().AclInit(nullptr)) { AclEnvGuard::AclEnvGuard() : errno_(AclInitAdapter::GetInstance().AclInit(nullptr)) {
if (errno_ != ACL_ERROR_NONE && errno_ != ACL_ERROR_REPEAT_INITIALIZE) { if (errno_ != ACL_ERROR_NONE && errno_ != ACL_ERROR_REPEAT_INITIALIZE) {
MS_LOG(ERROR) << "Execute aclInit Failed"; MS_LOG(ERROR) << "Execute aclInit Failed.";
return; return;
} }
MS_LOG(INFO) << "Acl init success"; MS_LOG(INFO) << "Acl init success.";
} }
AclEnvGuard::~AclEnvGuard() { AclEnvGuard::~AclEnvGuard() {
errno_ = AclInitAdapter::GetInstance().AclFinalize(); errno_ = AclInitAdapter::GetInstance().AclFinalize();
if (errno_ != ACL_ERROR_NONE && errno_ != ACL_ERROR_REPEAT_FINALIZE) { if (errno_ != ACL_ERROR_NONE && errno_ != ACL_ERROR_REPEAT_FINALIZE) {
MS_LOG(ERROR) << "Finalize acl failed"; MS_LOG(ERROR) << "Finalize acl failed.";
} }
MS_LOG(INFO) << "Acl finalize success"; MS_LOG(INFO) << "Acl finalize success.";
} }
std::shared_ptr<AclEnvGuard> AclEnvGuard::GetAclEnv() { std::shared_ptr<AclEnvGuard> AclEnvGuard::GetAclEnv() {
@ -77,11 +80,11 @@ std::shared_ptr<AclEnvGuard> AclEnvGuard::GetAclEnv() {
acl_env = std::make_shared<AclEnvGuard>(); acl_env = std::make_shared<AclEnvGuard>();
aclError ret = acl_env->GetErrno(); aclError ret = acl_env->GetErrno();
if (ret != ACL_ERROR_NONE && ret != ACL_ERROR_REPEAT_INITIALIZE) { if (ret != ACL_ERROR_NONE && ret != ACL_ERROR_REPEAT_INITIALIZE) {
MS_LOG(ERROR) << "Execute aclInit Failed"; MS_LOG(ERROR) << "Execute aclInit Failed.";
return nullptr; return nullptr;
} }
global_acl_env_ = acl_env; global_acl_env_ = acl_env;
MS_LOG(INFO) << "Acl init success"; MS_LOG(INFO) << "Acl init success.";
} }
return acl_env; return acl_env;
} }

View File

@ -17,11 +17,6 @@ if(NOT MSLITE_ENABLE_ACL)
${DVPP_UTILS_SRC} ${DVPP_UTILS_SRC}
${CMAKE_SOURCE_DIR}/mindspore/ccsrc/cxx_api/graph/acl/acl_env_guard.cc ${CMAKE_SOURCE_DIR}/mindspore/ccsrc/cxx_api/graph/acl/acl_env_guard.cc
) )
else()
set(DVPP_UTILS_SRC
${DVPP_UTILS_SRC}
${CMAKE_SOURCE_DIR}/../../mindspore/ccsrc/cxx_api/graph/acl/acl_env_guard.cc
)
endif() endif()
add_library(dvpp_utils SHARED ${DVPP_UTILS_SRC}) add_library(dvpp_utils SHARED ${DVPP_UTILS_SRC})
@ -30,7 +25,8 @@ enable_target_when_only_build_plugins(dvpp_utils)
if(MSLITE_ENABLE_ACL) if(MSLITE_ENABLE_ACL)
find_library(acl_dvpp libacl_dvpp.so ${ASCEND_CANN_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH}) find_library(acl_dvpp libacl_dvpp.so ${ASCEND_CANN_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH})
find_library(acl libascendcl.so ${ASCEND_CANN_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH}) find_library(acl libascendcl.so ${ASCEND_CANN_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH})
target_link_libraries(dvpp_utils PRIVATE minddata-lite ${acl} ${acl_dvpp} mindspore_core) # find acl_env_guard in ascend_kernel_plugin
target_link_libraries(dvpp_utils PRIVATE ascend_kernel_plugin minddata-lite ${acl} ${acl_dvpp} mindspore_core)
else() else()
target_link_libraries(dvpp_utils PRIVATE _c_dataengine ${ACL} ${ACL_DVPP} mindspore_core mindspore_shared_lib) target_link_libraries(dvpp_utils PRIVATE _c_dataengine ${ACL} ${ACL_DVPP} mindspore_core mindspore_shared_lib)
endif() endif()

View File

@ -66,7 +66,7 @@ void ResourceManager::Release() {
} }
// finalize the acl when the process exit // finalize the acl when the process exit
ret = mindspore::AclInitAdapter::GetInstance().AclFinalize(); ret = AclInitAdapter::GetInstance().AclFinalize();
if (ret != APP_ERR_OK) { if (ret != APP_ERR_OK) {
MS_LOG(DEBUG) << "Failed to finalize acl, ret = " << ret << "."; MS_LOG(DEBUG) << "Failed to finalize acl, ret = " << ret << ".";
} }
@ -96,7 +96,7 @@ APP_ERROR ResourceManager::InitResource(ResourceInfo &resourceInfo) {
return APP_ERR_OK; return APP_ERR_OK;
} }
APP_ERROR ret = APP_ERR_OK; APP_ERROR ret = APP_ERR_OK;
acl_env_ = mindspore::AclEnvGuard::GetAclEnv(); acl_env_ = AclEnvGuard::GetAclEnv();
if (acl_env_ == nullptr) { if (acl_env_ == nullptr) {
MS_LOG(ERROR) << "Failed to init acl."; MS_LOG(ERROR) << "Failed to init acl.";
return APP_ERR_COMM_FAILURE; return APP_ERR_COMM_FAILURE;

View File

@ -22,7 +22,15 @@
#include <sys/stat.h> #include <sys/stat.h>
#include <unordered_map> #include <unordered_map>
#include <vector> #include <vector>
#ifndef BUILD_LITE
#include "mindspore/ccsrc/cxx_api/graph/acl/acl_env_guard.h" #include "mindspore/ccsrc/cxx_api/graph/acl/acl_env_guard.h"
using AclEnvGuard = mindspore::AclEnvGuard;
using AclInitAdapter = mindspore::AclInitAdapter;
#else
#include "mindspore/lite/src/extendrt/kernel/ascend/model/acl_env_guard.h"
using AclEnvGuard = mindspore::kernel::acl::AclEnvGuard;
using AclInitAdapter = mindspore::kernel::acl::AclInitAdapter;
#endif
#include "minddata/dataset/kernels/image/dvpp/utils/CommonDataType.h" #include "minddata/dataset/kernels/image/dvpp/utils/CommonDataType.h"
#include "minddata/dataset/kernels/image/dvpp/utils/ErrorCode.h" #include "minddata/dataset/kernels/image/dvpp/utils/ErrorCode.h"
#include "minddata/dataset/kernels/image/dvpp/utils/resouce_info.h" #include "minddata/dataset/kernels/image/dvpp/utils/resouce_info.h"
@ -57,7 +65,7 @@ class ResourceManager {
std::vector<int> deviceIds_; std::vector<int> deviceIds_;
std::vector<aclrtContext> contexts_; std::vector<aclrtContext> contexts_;
std::unordered_map<int, int> deviceIdMap_; // Map of device to index std::unordered_map<int, int> deviceIdMap_; // Map of device to index
std::shared_ptr<mindspore::AclEnvGuard> acl_env_; std::shared_ptr<AclEnvGuard> acl_env_;
}; };
#endif #endif

View File

@ -23,16 +23,83 @@ namespace acl {
std::shared_ptr<AclEnvGuard> AclEnvGuard::global_acl_env_ = nullptr; std::shared_ptr<AclEnvGuard> AclEnvGuard::global_acl_env_ = nullptr;
std::mutex AclEnvGuard::global_acl_env_mutex_; std::mutex AclEnvGuard::global_acl_env_mutex_;
AclEnvGuard::AclEnvGuard(std::string_view cfg_file) { AclInitAdapter &AclInitAdapter::GetInstance() {
errno_ = aclInit(cfg_file.data()); static AclInitAdapter instance = {};
if (errno_ != ACL_ERROR_NONE && errno_ != ACL_ERROR_REPEAT_INITIALIZE) { return instance;
MS_LOG(ERROR) << "Execute aclInit Failed";
return;
}
MS_LOG(INFO) << "Acl init success";
} }
AclEnvGuard::~AclEnvGuard() { (void)aclFinalize(); } aclError AclInitAdapter::AclInit(const char *config_file) {
std::lock_guard<std::mutex> lock(flag_mutex_);
if (init_flag_) {
return ACL_ERROR_NONE;
}
init_flag_ = true;
return aclInit(config_file);
}
aclError AclInitAdapter::AclFinalize() {
std::lock_guard<std::mutex> lock(flag_mutex_);
if (!init_flag_) {
MS_LOG(INFO) << "Had been acl finalize.";
return ACL_ERROR_NONE;
}
MS_LOG(INFO) << "Begin acl finalize.";
init_flag_ = false;
return aclFinalize();
}
aclError AclInitAdapter::ForceFinalize() {
std::lock_guard<std::mutex> lock(flag_mutex_);
MS_LOG(INFO) << "Begin force acl finalize.";
init_flag_ = false;
return aclFinalize();
}
AclEnvGuard::AclEnvGuard() : errno_(AclInitAdapter::GetInstance().AclInit(nullptr)) {
if (errno_ != ACL_ERROR_NONE && errno_ != ACL_ERROR_REPEAT_INITIALIZE) {
MS_LOG(ERROR) << "Execute aclInit Failed.";
return;
}
MS_LOG(INFO) << "Acl init success.";
}
AclEnvGuard::AclEnvGuard(std::string_view cfg_file) : errno_(AclInitAdapter::GetInstance().AclInit(cfg_file.data())) {
if (errno_ != ACL_ERROR_NONE && errno_ != ACL_ERROR_REPEAT_INITIALIZE) {
MS_LOG(ERROR) << "Execute aclInit Failed.";
return;
}
MS_LOG(INFO) << "Acl init success.";
}
AclEnvGuard::~AclEnvGuard() {
errno_ = AclInitAdapter::GetInstance().AclFinalize();
if (errno_ != ACL_ERROR_NONE && errno_ != ACL_ERROR_REPEAT_FINALIZE) {
MS_LOG(ERROR) << "Finalize acl failed.";
}
MS_LOG(INFO) << "Acl finalize success.";
}
std::shared_ptr<AclEnvGuard> AclEnvGuard::GetAclEnv() {
std::shared_ptr<AclEnvGuard> acl_env;
std::lock_guard<std::mutex> lock(global_acl_env_mutex_);
acl_env = global_acl_env_;
if (acl_env != nullptr) {
MS_LOG(INFO) << "Acl has been initialized, skip.";
} else {
acl_env = std::make_shared<AclEnvGuard>();
aclError ret = acl_env->GetErrno();
if (ret != ACL_ERROR_NONE && ret != ACL_ERROR_REPEAT_INITIALIZE) {
MS_LOG(ERROR) << "Execute aclInit Failed.";
return nullptr;
}
global_acl_env_ = acl_env;
MS_LOG(INFO) << "Acl init success.";
}
return acl_env;
}
std::shared_ptr<AclEnvGuard> AclEnvGuard::GetAclEnv(std::string_view cfg_file) { std::shared_ptr<AclEnvGuard> AclEnvGuard::GetAclEnv(std::string_view cfg_file) {
std::shared_ptr<AclEnvGuard> acl_env; std::shared_ptr<AclEnvGuard> acl_env;
@ -48,11 +115,11 @@ std::shared_ptr<AclEnvGuard> AclEnvGuard::GetAclEnv(std::string_view cfg_file) {
acl_env = std::make_shared<AclEnvGuard>(cfg_file); acl_env = std::make_shared<AclEnvGuard>(cfg_file);
aclError ret = acl_env->GetErrno(); aclError ret = acl_env->GetErrno();
if (ret != ACL_ERROR_NONE && ret != ACL_ERROR_REPEAT_INITIALIZE) { if (ret != ACL_ERROR_NONE && ret != ACL_ERROR_REPEAT_INITIALIZE) {
MS_LOG(ERROR) << "Execute aclInit Failed"; MS_LOG(ERROR) << "Execute aclInit Failed.";
return nullptr; return nullptr;
} }
global_acl_env_ = acl_env; global_acl_env_ = acl_env;
MS_LOG(INFO) << "Acl init success"; MS_LOG(INFO) << "Acl init success.";
} }
return acl_env; return acl_env;
} }

View File

@ -23,11 +23,28 @@
namespace mindspore::kernel { namespace mindspore::kernel {
namespace acl { namespace acl {
class AclInitAdapter {
public:
static AclInitAdapter &GetInstance();
aclError AclInit(const char *config_file);
aclError AclFinalize();
aclError ForceFinalize();
private:
AclInitAdapter() : init_flag_(false) {}
~AclInitAdapter() = default;
bool init_flag_;
std::mutex flag_mutex_;
};
class AclEnvGuard { class AclEnvGuard {
public: public:
AclEnvGuard();
explicit AclEnvGuard(std::string_view cfg_file); explicit AclEnvGuard(std::string_view cfg_file);
~AclEnvGuard(); ~AclEnvGuard();
aclError GetErrno() const { return errno_; } aclError GetErrno() const { return errno_; }
static std::shared_ptr<AclEnvGuard> GetAclEnv();
static std::shared_ptr<AclEnvGuard> GetAclEnv(std::string_view cfg_file); static std::shared_ptr<AclEnvGuard> GetAclEnv(std::string_view cfg_file);
private: private:

View File

@ -23,16 +23,83 @@ namespace acl {
std::shared_ptr<AclEnvGuard> AclEnvGuard::global_acl_env_ = nullptr; std::shared_ptr<AclEnvGuard> AclEnvGuard::global_acl_env_ = nullptr;
std::mutex AclEnvGuard::global_acl_env_mutex_; std::mutex AclEnvGuard::global_acl_env_mutex_;
AclEnvGuard::AclEnvGuard(std::string_view cfg_file) : errno_(ACL_ERROR_NONE) { AclInitAdapter &AclInitAdapter::GetInstance() {
errno_ = aclInit(cfg_file.data()); static AclInitAdapter instance = {};
if (errno_ != ACL_ERROR_NONE && errno_ != ACL_ERROR_REPEAT_INITIALIZE) { return instance;
MS_LOG(ERROR) << "Execute aclInit Failed";
return;
}
MS_LOG(INFO) << "Acl init success";
} }
AclEnvGuard::~AclEnvGuard() { (void)aclFinalize(); } aclError AclInitAdapter::AclInit(const char *config_file) {
std::lock_guard<std::mutex> lock(flag_mutex_);
if (init_flag_) {
return ACL_ERROR_NONE;
}
init_flag_ = true;
return aclInit(config_file);
}
aclError AclInitAdapter::AclFinalize() {
std::lock_guard<std::mutex> lock(flag_mutex_);
if (!init_flag_) {
MS_LOG(INFO) << "Had been acl finalize.";
return ACL_ERROR_NONE;
}
MS_LOG(INFO) << "Begine acl finalize.";
init_flag_ = false;
return aclFinalize();
}
aclError AclInitAdapter::ForceFinalize() {
std::lock_guard<std::mutex> lock(flag_mutex_);
MS_LOG(INFO) << "Begine force acl finalize.";
init_flag_ = false;
return aclFinalize();
}
AclEnvGuard::AclEnvGuard() : errno_(AclInitAdapter::GetInstance().AclInit(nullptr)) {
if (errno_ != ACL_ERROR_NONE && errno_ != ACL_ERROR_REPEAT_INITIALIZE) {
MS_LOG(ERROR) << "Execute aclInit Failed.";
return;
}
MS_LOG(INFO) << "Acl init success.";
}
AclEnvGuard::AclEnvGuard(std::string_view cfg_file) : errno_(AclInitAdapter::GetInstance().AclInit(cfg_file.data())) {
if (errno_ != ACL_ERROR_NONE && errno_ != ACL_ERROR_REPEAT_INITIALIZE) {
MS_LOG(ERROR) << "Execute aclInit Failed.";
return;
}
MS_LOG(INFO) << "Acl init success.";
}
AclEnvGuard::~AclEnvGuard() {
errno_ = AclInitAdapter::GetInstance().AclFinalize();
if (errno_ != ACL_ERROR_NONE && errno_ != ACL_ERROR_REPEAT_FINALIZE) {
MS_LOG(ERROR) << "Finalize acl failed.";
}
MS_LOG(INFO) << "Acl finalize success.";
}
std::shared_ptr<AclEnvGuard> AclEnvGuard::GetAclEnv() {
std::shared_ptr<AclEnvGuard> acl_env;
std::lock_guard<std::mutex> lock(global_acl_env_mutex_);
acl_env = global_acl_env_;
if (acl_env != nullptr) {
MS_LOG(INFO) << "Acl has been initialized, skip.";
} else {
acl_env = std::make_shared<AclEnvGuard>();
aclError ret = acl_env->GetErrno();
if (ret != ACL_ERROR_NONE && ret != ACL_ERROR_REPEAT_INITIALIZE) {
MS_LOG(ERROR) << "Execute aclInit Failed.";
return nullptr;
}
global_acl_env_ = acl_env;
MS_LOG(INFO) << "Acl init success.";
}
return acl_env;
}
std::shared_ptr<AclEnvGuard> AclEnvGuard::GetAclEnv(std::string_view cfg_file) { std::shared_ptr<AclEnvGuard> AclEnvGuard::GetAclEnv(std::string_view cfg_file) {
std::shared_ptr<AclEnvGuard> acl_env; std::shared_ptr<AclEnvGuard> acl_env;
@ -48,11 +115,11 @@ std::shared_ptr<AclEnvGuard> AclEnvGuard::GetAclEnv(std::string_view cfg_file) {
acl_env = std::make_shared<AclEnvGuard>(cfg_file); acl_env = std::make_shared<AclEnvGuard>(cfg_file);
aclError ret = acl_env->GetErrno(); aclError ret = acl_env->GetErrno();
if (ret != ACL_ERROR_NONE && ret != ACL_ERROR_REPEAT_INITIALIZE) { if (ret != ACL_ERROR_NONE && ret != ACL_ERROR_REPEAT_INITIALIZE) {
MS_LOG(ERROR) << "Execute aclInit Failed"; MS_LOG(ERROR) << "Execute aclInit Failed.";
return nullptr; return nullptr;
} }
global_acl_env_ = acl_env; global_acl_env_ = acl_env;
MS_LOG(INFO) << "Acl init success"; MS_LOG(INFO) << "Acl init success.";
} }
return acl_env; return acl_env;
} }

View File

@ -23,11 +23,28 @@
namespace mindspore::kernel { namespace mindspore::kernel {
namespace acl { namespace acl {
class AclInitAdapter {
public:
static AclInitAdapter &GetInstance();
aclError AclInit(const char *config_file);
aclError AclFinalize();
aclError ForceFinalize();
private:
AclInitAdapter() : init_flag_(false) {}
~AclInitAdapter() = default;
bool init_flag_;
std::mutex flag_mutex_;
};
class AclEnvGuard { class AclEnvGuard {
public: public:
AclEnvGuard();
explicit AclEnvGuard(std::string_view cfg_file); explicit AclEnvGuard(std::string_view cfg_file);
~AclEnvGuard(); ~AclEnvGuard();
aclError GetErrno() const { return errno_; } aclError GetErrno() const { return errno_; }
static std::shared_ptr<AclEnvGuard> GetAclEnv();
static std::shared_ptr<AclEnvGuard> GetAclEnv(std::string_view cfg_file); static std::shared_ptr<AclEnvGuard> GetAclEnv(std::string_view cfg_file);
private: private:

View File

@ -326,7 +326,7 @@ class FileWriter:
# check the status of worker process # check the status of worker process
for i in range(len(self._paths)): for i in range(len(self._paths)):
if not self._workers[i].is_alive(): if not self._workers[i].is_alive():
raise RuntimeError("Worker process(pid:{}) has stopped. Please check " \ raise RuntimeError("Worker process(pid:{}) has stopped abnormal. Please check " \
"the above log".format(self._workers[i].pid)) "the above log".format(self._workers[i].pid))
continue continue
return SUCCESS return SUCCESS

View File

@ -33,7 +33,7 @@ class ImageNetToMR:
Args: Args:
map_file (str): The map file that indicates label. This file can be generated by command map_file (str): The map file that indicates label. This file can be generated by command
:code:`ls -l [image_dir] | grep -vE "total|\." | awk -F " " '{print $9, NR-1;}' > [file_path]` , :code:`ls -l [image_dir] | grep -vE "total|\." | awk -F " " '{print $9, NR-1;}' > [file_path]` ,
where `image_dir` is image directory contains n02119789, n02100735, n02110185 and n02096294 directory where `image_dir` is image directory contains n01440764, n01443537, n01484850 and n15075141 directory
and `file_path` is the generated `map_file` . An example of `map_file` is as below: and `file_path` is the generated `map_file` . An example of `map_file` is as below:
.. code-block:: .. code-block::
@ -45,7 +45,7 @@ class ImageNetToMR:
... ...
n15075141 999 n15075141 999
image_dir (str): Image directory contains n02119789, n02100735, n02110185 and n02096294 directory. image_dir (str): Image directory contains n01440764, n01443537, n01484850 and n15075141 directory.
destination (str): MindRecord file path to transform into, ensure that the directory is created in advance and destination (str): MindRecord file path to transform into, ensure that the directory is created in advance and
no file with the same name exists in the directory. no file with the same name exists in the directory.
partition_number (int, optional): The partition size. Default: 1. partition_number (int, optional): The partition size. Default: 1.