forked from mindspore-Ecosystem/mindspore
commit
f3b70ff9db
|
@ -980,7 +980,7 @@ Contributions of any kind are welcome!
|
|||
|
||||
#### DataSet
|
||||
|
||||
- [STABLE] If the libnuma library is installed in the environment, you can run `export DATASET_ENABLE_NUMA=True` to configure NUMA binding. In multi-card training scenarios, the training data processing speed can be improved, thereby improving the network training efficiency.
|
||||
- [STABLE] If the libnuma library is installed in the environment, you can run `export DATASET_ENABLE_NUMA=True` or `export MS_ENABLE_NUMA=True` to configure NUMA binding. In multi-card training scenarios, the training data processing speed can be improved, thereby improving the network training efficiency.
|
||||
- [STABLE] Unify API Tensor structure of Training/Inference interfaces in C++ SDK.
|
||||
- [STABLE] Optimize duplicated Decode in data preprocess using cache, improve preprocess efficiency.
|
||||
- [STABLE] Support eager mode to run data augmentation in Python & C++.
|
||||
|
|
|
@ -20,7 +20,7 @@
|
|||
#include "minddata/dataset/engine/datasetops/dataset_op.h"
|
||||
#include "minddata/dataset/engine/datasetops/device_queue_op.h"
|
||||
#if defined(ENABLE_GPUQUE) || defined(ENABLE_TDTQUE)
|
||||
#include "minddata/dataset/util/numa_interface.h"
|
||||
#include "mindspore/core/utils/numa_interface.h"
|
||||
#endif
|
||||
#include "minddata/dataset/util/task_manager.h"
|
||||
#include "minddata/dataset/util/service.h"
|
||||
|
@ -45,9 +45,7 @@ ExecutionTree::ExecutionTree() : id_count_(0), tree_state_(kDeTStateInit) {
|
|||
ExecutionTree::~ExecutionTree() {
|
||||
#if defined(ENABLE_GPUQUE) || defined(ENABLE_TDTQUE)
|
||||
if (numa_enable_) {
|
||||
if (handle_ != nullptr) {
|
||||
ReleaseLibrary(handle_);
|
||||
}
|
||||
handle_ = nullptr;
|
||||
}
|
||||
#if defined(ENABLE_TDTQUE)
|
||||
DeviceQueueOp *op = dynamic_cast<DeviceQueueOp *>(root_.get());
|
||||
|
@ -162,7 +160,7 @@ Status ExecutionTree::Launch() {
|
|||
RETURN_STATUS_UNEXPECTED("Numa package (libnuma.so) not found.");
|
||||
}
|
||||
}
|
||||
RETURN_IF_NOT_OK(NumaBind(handle_, rank_id_));
|
||||
RETURN_IF_NOT_OK(NumaBind(handle_.get(), rank_id_));
|
||||
MS_LOG(INFO) << "Numa bind memory and cpu successful.";
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -235,7 +235,7 @@ class ExecutionTree {
|
|||
// but for distribute scenario, this rank_id come from _get_global_rank() in python
|
||||
int32_t rank_id_;
|
||||
bool numa_enable_;
|
||||
void *handle_;
|
||||
std::shared_ptr<void> handle_;
|
||||
#endif
|
||||
};
|
||||
} // namespace dataset
|
||||
|
|
|
@ -1,6 +1,3 @@
|
|||
file(GLOB_RECURSE _CURRENT_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc")
|
||||
set_property(SOURCE ${_CURRENT_SRC_FILES} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_MD)
|
||||
if(NOT ${CMAKE_SYSTEM_NAME} MATCHES "Linux")
|
||||
LIST(REMOVE_ITEM _CURRENT_SRC_FILES numa_interface.cc)
|
||||
endif()
|
||||
add_library(utils OBJECT ${_CURRENT_SRC_FILES})
|
||||
add_library(utils OBJECT ${_CURRENT_SRC_FILES})
|
||||
|
|
|
@ -60,12 +60,19 @@
|
|||
#include "abstract/ops/primitive_infer_map.h"
|
||||
#include "mindspore/core/utils/file_utils.h"
|
||||
|
||||
#if !defined(_WIN32) && !defined(_WIN64) && !defined(__APPLE__)
|
||||
#include "utils/numa_interface.h"
|
||||
#endif
|
||||
|
||||
namespace mindspore {
|
||||
namespace runtime {
|
||||
using distributed::cluster::ClusterContext;
|
||||
using distributed::collective::CollectiveManager;
|
||||
using distributed::recovery::RecoveryContext;
|
||||
namespace {
|
||||
constexpr char kNumaEnableEnv[] = "MS_ENABLE_NUMA";
|
||||
constexpr char kNumaEnableEnv2[] = "DATASET_ENABLE_NUMA";
|
||||
|
||||
bool IsNeedInsertCopyActor(const DeviceContext *from_device_context, const DeviceContext *to_device_context) {
|
||||
MS_EXCEPTION_IF_NULL(from_device_context);
|
||||
MS_EXCEPTION_IF_NULL(to_device_context);
|
||||
|
@ -359,6 +366,7 @@ void GraphScheduler::Initialize() {
|
|||
}
|
||||
init_ = true;
|
||||
|
||||
BindNumaNode();
|
||||
(void)kKernelTypeToLinkFunc.emplace(KernelTransformType::kDeviceDataSourceActor,
|
||||
&GraphScheduler::LinkDataArrowForBaseActor);
|
||||
(void)kKernelTypeToLinkFunc.emplace(KernelTransformType::kHostDataSourceActor,
|
||||
|
@ -2234,5 +2242,30 @@ void GraphScheduler::DumpDeviceTensorStore(const GraphCompilerInfo &graph_compil
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
void GraphScheduler::BindNumaNode() {
|
||||
auto numa_enable = common::GetEnv(kNumaEnableEnv);
|
||||
auto numa_enable2 = common::GetEnv(kNumaEnableEnv2);
|
||||
if ((numa_enable.empty() || numa_enable != "1") && (numa_enable2.empty() || numa_enable2 != "1")) {
|
||||
return;
|
||||
}
|
||||
|
||||
#if !defined(_WIN32) && !defined(_WIN64) && !defined(__APPLE__) && !defined(ENABLE_ANDROID)
|
||||
uint32_t rank_id = CommManager::GetInstance().GetRank();
|
||||
MS_LOG(INFO) << "Bind numa node for rank " << rank_id;
|
||||
if (numa_handle_ == nullptr) {
|
||||
numa_handle_ = GetNumaAdapterHandle();
|
||||
if (numa_handle_ == nullptr) {
|
||||
MS_LOG(EXCEPTION) << "Load numa library failed.";
|
||||
}
|
||||
}
|
||||
|
||||
auto ret = NumaBind(numa_handle_.get(), rank_id);
|
||||
if (ret != StatusCode::kSuccess) {
|
||||
MS_LOG(EXCEPTION) << "Bind numa node failed, ret = " << ret.GetErrDescription();
|
||||
}
|
||||
MS_LOG(INFO) << "Numa bind memory and cpu successful.";
|
||||
#endif
|
||||
}
|
||||
} // namespace runtime
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -189,6 +189,9 @@ class BACKEND_EXPORT GraphScheduler {
|
|||
void DumpActor(const ActorSet *actor_set, const GraphCompilerInfo &graph_compiler_info) const;
|
||||
void DumpDeviceTensorStore(const GraphCompilerInfo &graph_compiler_info, std::ofstream &ofs) const;
|
||||
|
||||
// bind thread pool to same numa node
|
||||
void BindNumaNode();
|
||||
|
||||
// The global maps, only be cleared in the deconstruction.
|
||||
mindspore::HashMap<ActorInfo, ActorSetPtr> actors_;
|
||||
|
||||
|
@ -214,6 +217,8 @@ class BACKEND_EXPORT GraphScheduler {
|
|||
|
||||
// Whether actor running by the persistent execution order.
|
||||
bool execution_order_running_{false};
|
||||
// numa library handle
|
||||
std::shared_ptr<void> numa_handle_{};
|
||||
|
||||
bool init_{false};
|
||||
};
|
||||
|
|
|
@ -42,6 +42,15 @@ file(GLOB_RECURSE CORE_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
|
|||
|
||||
set(CORE_SRC_LIST ${CORE_SRC_LIST} ${CORE_OPS_LIST})
|
||||
|
||||
if(NOT ${CMAKE_SYSTEM_NAME} MATCHES "Linux")
|
||||
LIST(REMOVE_ITEM CORE_SRC_LIST utils/numa_interface.cc)
|
||||
endif()
|
||||
|
||||
if(ENABLE_SECURITY)
|
||||
file(GLOB_RECURSE _INFER_SUMMARY_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "ops/*_summary.cc")
|
||||
list(REMOVE_ITEM CORE_SRC_LIST ${_INFER_SUMMARY_FILES})
|
||||
endif()
|
||||
|
||||
file(GLOB_RECURSE PROTO_FILE RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "proto/*.proto")
|
||||
if(NOT(BUILD_LITE))
|
||||
ms_protobuf_generate_py(PROTO_SRCS PY_HDRS PY_PYS ${PROTO_FILE})
|
||||
|
|
|
@ -13,11 +13,28 @@
|
|||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#include "minddata/dataset/util/numa_interface.h"
|
||||
#include "utils/numa_interface.h"
|
||||
#include <dlfcn.h>
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
#include "utils/log_adapter.h"
|
||||
|
||||
#define RETURN_STATUS_UNEXPECTED(_e) \
|
||||
do { \
|
||||
return Status(StatusCode::kCoreFailed, __LINE__, __FILE__, _e); \
|
||||
} while (false)
|
||||
|
||||
namespace mindspore {
|
||||
namespace dataset {
|
||||
namespace {
|
||||
struct bitmask {
|
||||
uint64_t size;
|
||||
uint64_t *maskp;
|
||||
};
|
||||
|
||||
std::weak_ptr<void> g_numa_lib_handle;
|
||||
std::mutex g_numa_lib_handle_mutex;
|
||||
} // namespace
|
||||
|
||||
inline void *LoadLibrary(const char *name) {
|
||||
if (name == nullptr) {
|
||||
return nullptr;
|
||||
|
@ -45,9 +62,16 @@ void ReleaseLibrary(void *handle) {
|
|||
}
|
||||
}
|
||||
|
||||
void *GetNumaAdapterHandle() {
|
||||
std::shared_ptr<void> GetNumaAdapterHandle() {
|
||||
std::lock_guard<std::mutex> lock(g_numa_lib_handle_mutex);
|
||||
auto shared = g_numa_lib_handle.lock();
|
||||
if (shared != nullptr) {
|
||||
return shared;
|
||||
}
|
||||
void *handle = LoadLibrary("libnuma.so");
|
||||
return handle;
|
||||
shared = std::shared_ptr<void>(handle, ReleaseLibrary);
|
||||
g_numa_lib_handle = shared;
|
||||
return shared;
|
||||
}
|
||||
|
||||
Status NumaBind(void *handle, const int32_t &rank_id) {
|
||||
|
@ -100,5 +124,4 @@ Status NumaBind(void *handle, const int32_t &rank_id) {
|
|||
}
|
||||
return Status::OK();
|
||||
}
|
||||
} // namespace dataset
|
||||
} // namespace mindspore
|
|
@ -13,33 +13,22 @@
|
|||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_UTIL_NUMA_INTERFACE_H_
|
||||
#define MINDSPORE_CCSRC_MINDDATA_DATASET_UTIL_NUMA_INTERFACE_H_
|
||||
#ifndef MINDSPORE_CORE_UTILS_NUMA_INTERFACE_H_
|
||||
#define MINDSPORE_CORE_UTILS_NUMA_INTERFACE_H_
|
||||
|
||||
#include "minddata/dataset/util/log_adapter.h"
|
||||
#include "minddata/dataset/util/status.h"
|
||||
#include <memory>
|
||||
#include "include/api/status.h"
|
||||
#include "utils/visible.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace dataset {
|
||||
struct bitmask {
|
||||
uint64_t size;
|
||||
uint64_t *maskp;
|
||||
};
|
||||
|
||||
// Now we separate the link from _c_dataengine with numa,
|
||||
// Now we separate the link from mindspore binary with numa,
|
||||
// and we use dlopen("libnuma") instead. This function will
|
||||
// return a handle which you can do NumaBind and ReleaseLibrary.
|
||||
void *GetNumaAdapterHandle();
|
||||
MS_CORE_API std::shared_ptr<void> GetNumaAdapterHandle();
|
||||
|
||||
// Totally this function will do:
|
||||
// 1. Get function pointer of numa api
|
||||
// 2. Do numa_bind
|
||||
Status NumaBind(void *handle, const int32_t &rank_id);
|
||||
|
||||
// Release the numa handle for avoid memory leak, we should
|
||||
// not allow handle is nullptr before we use it.
|
||||
void ReleaseLibrary(void *handle);
|
||||
} // namespace dataset
|
||||
MS_CORE_API Status NumaBind(void *handle, const int32_t &rank_id);
|
||||
} // namespace mindspore
|
||||
|
||||
#endif // MINDSPORE_CCSRC_MINDDATA_DATASET_UTIL_NUMA_INTERFACE_H_
|
||||
#endif // MINDSPORE_CORE_UTILS_NUMA_INTERFACE_H_
|
|
@ -65,6 +65,9 @@ def _init_device_info():
|
|||
from mindspore.parallel._utils import _get_global_rank
|
||||
numa_enable = False
|
||||
numa_enable_env = os.getenv("DATASET_ENABLE_NUMA", None)
|
||||
if numa_enable_env and numa_enable_env.strip() == 'True':
|
||||
numa_enable = True
|
||||
numa_enable_env = os.getenv("MS_ENABLE_NUMA", None)
|
||||
if numa_enable_env and numa_enable_env.strip() == 'True':
|
||||
numa_enable = True
|
||||
if context.get_context("device_target") == "GPU":
|
||||
|
|
Loading…
Reference in New Issue