diff --git a/mindspore/ccsrc/pipeline/jit/init.cc b/mindspore/ccsrc/pipeline/jit/init.cc index be3a8881b27..1622adfc36c 100644 --- a/mindspore/ccsrc/pipeline/jit/init.cc +++ b/mindspore/ccsrc/pipeline/jit/init.cc @@ -26,6 +26,7 @@ #include "utils/summary/event_writer.h" #include "utils/config_manager.h" #include "utils/mpi/mpi_config.h" +#include "utils/ms_utils.h" #include "frontend/parallel/context.h" #include "frontend/parallel/costmodel_context.h" #ifdef ENABLE_GPU_COLLECTIVE @@ -57,6 +58,9 @@ using PSContext = mindspore::ps::PSContext; // Interface with python PYBIND11_MODULE(_c_expression, m) { + // The OMP_NUM_THREADS has no effect when set in backend, so set it here in advance. + mindspore::common::SetOMPThreadNum(); + m.doc() = "MindSpore c plugin"; auto fns = mindspore::PybindDefineRegister::AllFuncs(); diff --git a/mindspore/ccsrc/vm/transform.cc b/mindspore/ccsrc/vm/transform.cc index 38241fa17cc..7eb82a04650 100644 --- a/mindspore/ccsrc/vm/transform.cc +++ b/mindspore/ccsrc/vm/transform.cc @@ -567,9 +567,14 @@ void SetMindRTEnable() { auto context_ptr = MsContext::GetInstance(); MS_EXCEPTION_IF_NULL(context_ptr); std::string target = context_ptr->get_param(MS_CTX_DEVICE_TARGET); - if (target != kGPUDevice) { + if ((target != kGPUDevice) && (target != kCPUDevice)) { return; } + +#if defined(_WIN32) || defined(_WIN64) + return; +#endif + #if (ENABLE_CPU && !_WIN32) if (ps::PSContext::instance()->is_ps_mode()) { return; diff --git a/mindspore/core/utils/ms_utils.h b/mindspore/core/utils/ms_utils.h index cccfb117f22..0982584a190 100644 --- a/mindspore/core/utils/ms_utils.h +++ b/mindspore/core/utils/ms_utils.h @@ -21,6 +21,7 @@ #include #include #include +#include #define DISABLE_COPY_AND_ASSIGN(ClassType) \ ClassType(const ClassType &) = delete; \ @@ -48,6 +49,19 @@ static inline int SetEnv(const char *envname, const char *envvar, int overwrite return ::setenv(envname, envvar, overwrite); #endif } + +static inline void SetOMPThreadNum() { + size_t cpu_core_num = std::thread::hardware_concurrency(); + size_t cpu_core_num_half = cpu_core_num / 2; + const size_t kOMPThreadMaxNum = 16; + const size_t kOMPThreadMinNum = 1; + + size_t OMP_thread_num = cpu_core_num_half < kOMPThreadMinNum ? kOMPThreadMinNum : cpu_core_num_half; + OMP_thread_num = OMP_thread_num > kOMPThreadMaxNum ? kOMPThreadMaxNum : OMP_thread_num; + + std::string OMP_env = std::to_string(OMP_thread_num); + SetEnv("OMP_NUM_THREADS", OMP_env.c_str(), 0); +} } // namespace common } // namespace mindspore diff --git a/tests/st/dump/test_data_dump.py b/tests/st/dump/test_data_dump.py index 6c6dae8c014..1da98e73942 100644 --- a/tests/st/dump/test_data_dump.py +++ b/tests/st/dump/test_data_dump.py @@ -87,10 +87,11 @@ def run_e2e_dump(): add = Net() add(Tensor(x), Tensor(y)) time.sleep(5) - assert len(os.listdir(dump_file_path)) == 5 if context.get_context("device_target") == "Ascend": + assert len(os.listdir(dump_file_path)) == 5 output_name = "Add.Add-op1.0.0.*.output.0.DefaultFormat.npy" else: + assert len(os.listdir(dump_file_path)) == 3 output_name = "Add.Add-op3.0.0.*.output.0.DefaultFormat.npy" output_path = glob.glob(dump_file_path + output_name)[0] real_path = os.path.realpath(output_path) @@ -116,6 +117,13 @@ def test_cpu_e2e_dump(): context.set_context(mode=context.GRAPH_MODE, device_target="CPU") run_e2e_dump() +@pytest.mark.level0 +@pytest.mark.platform_x86_gpu_training +@pytest.mark.env_onecard +def test_gpu_e2e_dump(): + context.set_context(mode=context.GRAPH_MODE, device_target="GPU") + run_e2e_dump() + class ReluReduceMeanDenseRelu(Cell): def __init__(self, kernel, bias, in_channel, num_class): super().__init__()