Enable mindRT on CPU device

2021-06-29 22:44:21 +08:00 · 2021-06-29 22:44:21 +08:00 · 36b1ff25b4
parent 25ee89cf30
commit 36b1ff25b4
4 changed files with 33 additions and 2 deletions
--- a/mindspore/ccsrc/pipeline/jit/init.cc
+++ b/mindspore/ccsrc/pipeline/jit/init.cc
@ -26,6 +26,7 @@
 #include "utils/summary/event_writer.h"
 #include "utils/config_manager.h"
 #include "utils/mpi/mpi_config.h"
+#include "utils/ms_utils.h"
 #include "frontend/parallel/context.h"
 #include "frontend/parallel/costmodel_context.h"
 #ifdef ENABLE_GPU_COLLECTIVE
@ -57,6 +58,9 @@ using PSContext = mindspore::ps::PSContext;

 // Interface with python
 PYBIND11_MODULE(_c_expression, m) {
+  // The OMP_NUM_THREADS has no effect when set in backend, so set it here in advance.
+  mindspore::common::SetOMPThreadNum();
+
  m.doc() = "MindSpore c plugin";

  auto fns = mindspore::PybindDefineRegister::AllFuncs();
--- a/mindspore/ccsrc/vm/transform.cc
+++ b/mindspore/ccsrc/vm/transform.cc
@ -567,9 +567,14 @@ void SetMindRTEnable() {
  auto context_ptr = MsContext::GetInstance();
  MS_EXCEPTION_IF_NULL(context_ptr);
  std::string target = context_ptr->get_param<std::string>(MS_CTX_DEVICE_TARGET);
-  if (target != kGPUDevice) {
+  if ((target != kGPUDevice) && (target != kCPUDevice)) {
    return;
  }
+
+#if defined(_WIN32) || defined(_WIN64)
+  return;
+#endif
+
 #if (ENABLE_CPU && !_WIN32)
  if (ps::PSContext::instance()->is_ps_mode()) {
    return;
--- a/mindspore/core/utils/ms_utils.h
+++ b/mindspore/core/utils/ms_utils.h
@ -21,6 +21,7 @@
 #include <string>
 #include <vector>
 #include <atomic>
+#include <thread>

 #define DISABLE_COPY_AND_ASSIGN(ClassType) \
  ClassType(const ClassType &) = delete;   \
@ -48,6 +49,19 @@ static inline int SetEnv(const char *envname, const char *envvar, int overwrite
  return ::setenv(envname, envvar, overwrite);
 #endif
 }
+
+static inline void SetOMPThreadNum() {
+  size_t cpu_core_num = std::thread::hardware_concurrency();
+  size_t cpu_core_num_half = cpu_core_num / 2;
+  const size_t kOMPThreadMaxNum = 16;
+  const size_t kOMPThreadMinNum = 1;
+
+  size_t OMP_thread_num = cpu_core_num_half < kOMPThreadMinNum ? kOMPThreadMinNum : cpu_core_num_half;
+  OMP_thread_num = OMP_thread_num > kOMPThreadMaxNum ? kOMPThreadMaxNum : OMP_thread_num;
+
+  std::string OMP_env = std::to_string(OMP_thread_num);
+  SetEnv("OMP_NUM_THREADS", OMP_env.c_str(), 0);
+}
 }  // namespace common
 }  // namespace mindspore

--- a/tests/st/dump/test_data_dump.py
+++ b/tests/st/dump/test_data_dump.py
@ -87,10 +87,11 @@ def run_e2e_dump():
    add = Net()
    add(Tensor(x), Tensor(y))
    time.sleep(5)
-    assert len(os.listdir(dump_file_path)) == 5
    if context.get_context("device_target") == "Ascend":
+        assert len(os.listdir(dump_file_path)) == 5
        output_name = "Add.Add-op1.0.0.*.output.0.DefaultFormat.npy"
    else:
+        assert len(os.listdir(dump_file_path)) == 3
        output_name = "Add.Add-op3.0.0.*.output.0.DefaultFormat.npy"
    output_path = glob.glob(dump_file_path + output_name)[0]
    real_path = os.path.realpath(output_path)
@ -116,6 +117,13 @@ def test_cpu_e2e_dump():
    context.set_context(mode=context.GRAPH_MODE, device_target="CPU")
    run_e2e_dump()

+@pytest.mark.level0
+@pytest.mark.platform_x86_gpu_training
+@pytest.mark.env_onecard
+def test_gpu_e2e_dump():
+    context.set_context(mode=context.GRAPH_MODE, device_target="GPU")
+    run_e2e_dump()
+
 class ReluReduceMeanDenseRelu(Cell):
    def __init__(self, kernel, bias, in_channel, num_class):
        super().__init__()