forked from mindspore-Ecosystem/mindspore
Enable mindRT on CPU device
This commit is contained in:
parent
25ee89cf30
commit
36b1ff25b4
|
@ -26,6 +26,7 @@
|
|||
#include "utils/summary/event_writer.h"
|
||||
#include "utils/config_manager.h"
|
||||
#include "utils/mpi/mpi_config.h"
|
||||
#include "utils/ms_utils.h"
|
||||
#include "frontend/parallel/context.h"
|
||||
#include "frontend/parallel/costmodel_context.h"
|
||||
#ifdef ENABLE_GPU_COLLECTIVE
|
||||
|
@ -57,6 +58,9 @@ using PSContext = mindspore::ps::PSContext;
|
|||
|
||||
// Interface with python
|
||||
PYBIND11_MODULE(_c_expression, m) {
|
||||
// The OMP_NUM_THREADS has no effect when set in backend, so set it here in advance.
|
||||
mindspore::common::SetOMPThreadNum();
|
||||
|
||||
m.doc() = "MindSpore c plugin";
|
||||
|
||||
auto fns = mindspore::PybindDefineRegister::AllFuncs();
|
||||
|
|
|
@ -567,9 +567,14 @@ void SetMindRTEnable() {
|
|||
auto context_ptr = MsContext::GetInstance();
|
||||
MS_EXCEPTION_IF_NULL(context_ptr);
|
||||
std::string target = context_ptr->get_param<std::string>(MS_CTX_DEVICE_TARGET);
|
||||
if (target != kGPUDevice) {
|
||||
if ((target != kGPUDevice) && (target != kCPUDevice)) {
|
||||
return;
|
||||
}
|
||||
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
return;
|
||||
#endif
|
||||
|
||||
#if (ENABLE_CPU && !_WIN32)
|
||||
if (ps::PSContext::instance()->is_ps_mode()) {
|
||||
return;
|
||||
|
|
|
@ -21,6 +21,7 @@
|
|||
#include <string>
|
||||
#include <vector>
|
||||
#include <atomic>
|
||||
#include <thread>
|
||||
|
||||
#define DISABLE_COPY_AND_ASSIGN(ClassType) \
|
||||
ClassType(const ClassType &) = delete; \
|
||||
|
@ -48,6 +49,19 @@ static inline int SetEnv(const char *envname, const char *envvar, int overwrite
|
|||
return ::setenv(envname, envvar, overwrite);
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void SetOMPThreadNum() {
|
||||
size_t cpu_core_num = std::thread::hardware_concurrency();
|
||||
size_t cpu_core_num_half = cpu_core_num / 2;
|
||||
const size_t kOMPThreadMaxNum = 16;
|
||||
const size_t kOMPThreadMinNum = 1;
|
||||
|
||||
size_t OMP_thread_num = cpu_core_num_half < kOMPThreadMinNum ? kOMPThreadMinNum : cpu_core_num_half;
|
||||
OMP_thread_num = OMP_thread_num > kOMPThreadMaxNum ? kOMPThreadMaxNum : OMP_thread_num;
|
||||
|
||||
std::string OMP_env = std::to_string(OMP_thread_num);
|
||||
SetEnv("OMP_NUM_THREADS", OMP_env.c_str(), 0);
|
||||
}
|
||||
} // namespace common
|
||||
} // namespace mindspore
|
||||
|
||||
|
|
|
@ -87,10 +87,11 @@ def run_e2e_dump():
|
|||
add = Net()
|
||||
add(Tensor(x), Tensor(y))
|
||||
time.sleep(5)
|
||||
assert len(os.listdir(dump_file_path)) == 5
|
||||
if context.get_context("device_target") == "Ascend":
|
||||
assert len(os.listdir(dump_file_path)) == 5
|
||||
output_name = "Add.Add-op1.0.0.*.output.0.DefaultFormat.npy"
|
||||
else:
|
||||
assert len(os.listdir(dump_file_path)) == 3
|
||||
output_name = "Add.Add-op3.0.0.*.output.0.DefaultFormat.npy"
|
||||
output_path = glob.glob(dump_file_path + output_name)[0]
|
||||
real_path = os.path.realpath(output_path)
|
||||
|
@ -116,6 +117,13 @@ def test_cpu_e2e_dump():
|
|||
context.set_context(mode=context.GRAPH_MODE, device_target="CPU")
|
||||
run_e2e_dump()
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_x86_gpu_training
|
||||
@pytest.mark.env_onecard
|
||||
def test_gpu_e2e_dump():
|
||||
context.set_context(mode=context.GRAPH_MODE, device_target="GPU")
|
||||
run_e2e_dump()
|
||||
|
||||
class ReluReduceMeanDenseRelu(Cell):
|
||||
def __init__(self, kernel, bias, in_channel, num_class):
|
||||
super().__init__()
|
||||
|
|
Loading…
Reference in New Issue