From a25b84055ca103266b2cf994a3edeb7f1409b773 Mon Sep 17 00:00:00 2001 From: lizhenyu Date: Wed, 27 May 2020 14:52:16 +0800 Subject: [PATCH] refine data copy in multi-graph --- mindspore/ccsrc/session/gpu_session.cc | 44 ++++++++++++++++++++++++ mindspore/ccsrc/session/gpu_session.h | 3 ++ mindspore/ccsrc/session/session_basic.cc | 2 +- mindspore/ccsrc/utils/convert_utils.cc | 2 +- tests/st/ops/gpu/test_float_status_op.py | 40 ++++++++++++--------- 5 files changed, 73 insertions(+), 18 deletions(-) diff --git a/mindspore/ccsrc/session/gpu_session.cc b/mindspore/ccsrc/session/gpu_session.cc index 89ab5788e77..725e2181d00 100644 --- a/mindspore/ccsrc/session/gpu_session.cc +++ b/mindspore/ccsrc/session/gpu_session.cc @@ -25,6 +25,7 @@ #include "device/kernel_runtime_manager.h" #include "predict/predict.h" #include "common/utils.h" +#include "common/trans.h" #include "utils/context/ms_context.h" namespace mindspore { @@ -83,6 +84,49 @@ void GPUSession::RunOpAllocateMemory(const std::vector &input runtime_instance->RunOpAssignMemory(input_tensors, kernel_graph); } +void GPUSession::LoadInputData(const std::shared_ptr &kernel_graph, + const std::vector &inputs_const) const { + std::vector inputs(inputs_const); + MS_EXCEPTION_IF_NULL(kernel_graph); + auto input_nodes = kernel_graph->inputs(); + auto ms_context = MsContext::GetInstance(); + MS_EXCEPTION_IF_NULL(ms_context); + + for (size_t i = 0; i < inputs.size(); ++i) { + auto tensor = inputs[i]; + MS_EXCEPTION_IF_NULL(tensor); + auto input_node = input_nodes[i]; + MS_EXCEPTION_IF_NULL(input_node); + if (input_node->isa() && AnfAlgo::OutputAddrExist(input_node, 0)) { + auto pk_node = input_node->cast(); + auto device_address = AnfAlgo::GetMutableOutputAddr(pk_node, 0); + bool need_sync = false; + if (ms_context->enable_pynative_infer()) { + if (tensor->device_address().get() == nullptr || tensor->device_address() != device_address) { + need_sync = true; + } + } else { + if (tensor->is_dirty()) { + need_sync = true; + } else if (tensor->device_address() != device_address) { + AnfAlgo::SetOutputAddr(tensor->device_address(), 0, pk_node.get()); + need_sync = false; + } + } + if (need_sync) { + tensor->set_device_address(device_address); + MS_EXCEPTION_IF_NULL(device_address); + if (!device_address->SyncHostToDevice(trans::GetRuntimePaddingShape(pk_node, 0), + LongToSize(tensor->data().nbytes()), tensor->data_type(), + tensor->data_c(false))) { + MS_LOG(EXCEPTION) << "SyncHostToDevice failed."; + } + } + } + tensor->set_dirty(false); + } +} + void GPUSession::Execute(const std::shared_ptr &kernel_graph) const { auto runtime_instance = device::KernelRuntimeManager::Instance().GetSingleKernelRuntime(kGPUDevice, device_id_); MS_EXCEPTION_IF_NULL(runtime_instance); diff --git a/mindspore/ccsrc/session/gpu_session.h b/mindspore/ccsrc/session/gpu_session.h index db320a3c884..b396e4a9ba1 100644 --- a/mindspore/ccsrc/session/gpu_session.h +++ b/mindspore/ccsrc/session/gpu_session.h @@ -59,6 +59,9 @@ class GPUSession : public SessionBasic { void RunOpAllocateMemory(const std::vector &input_tensors, KernelGraph *kernel_graph) const; + void LoadInputData(const std::shared_ptr &kernel_graph, + const std::vector &inputs_const) const override; + void Execute(const std::shared_ptr &kernel_graph) const; }; using GPUSessionPtr = std::shared_ptr; diff --git a/mindspore/ccsrc/session/session_basic.cc b/mindspore/ccsrc/session/session_basic.cc index 7cfe93dab06..0b5aaffe995 100644 --- a/mindspore/ccsrc/session/session_basic.cc +++ b/mindspore/ccsrc/session/session_basic.cc @@ -129,7 +129,7 @@ BaseRef CreateOneTensor(const AnfNodePtr &node, size_t output_index, const Kerne // if in paynative mode,data only copyed to host when user want to print data auto ms_context = MsContext::GetInstance(); MS_EXCEPTION_IF_NULL(ms_context); - if (ms_context->execution_mode() == kPynativeMode) { + if (ms_context->execution_mode() == kPynativeMode || ms_context->device_target() == kGPUDevice) { tensor->set_device_address(AnfAlgo::GetMutableOutputAddr(node, output_index)); tensor->set_dirty(false); } else if (!address->SyncDeviceToHost(trans::GetRuntimePaddingShape(node, output_index), diff --git a/mindspore/ccsrc/utils/convert_utils.cc b/mindspore/ccsrc/utils/convert_utils.cc index d5bbc2603b9..f8c8ee3faa8 100644 --- a/mindspore/ccsrc/utils/convert_utils.cc +++ b/mindspore/ccsrc/utils/convert_utils.cc @@ -216,7 +216,7 @@ bool ValueToBool(const ValuePtr &v, bool *value) { } else if (v->isa()) { auto tensor = v->cast(); MS_EXCEPTION_IF_NULL(tensor); - + (void)tensor->data_sync(); bool *tensor_data = static_cast(tensor->data_c()); // maybe need to support if tensor is a bool array auto vb = tensor_data[0]; diff --git a/tests/st/ops/gpu/test_float_status_op.py b/tests/st/ops/gpu/test_float_status_op.py index 65689577c49..cdb71bf8ad0 100644 --- a/tests/st/ops/gpu/test_float_status_op.py +++ b/tests/st/ops/gpu/test_float_status_op.py @@ -70,13 +70,15 @@ x3 = np.array([[1, 2], [3, 4], [5.0, 88.0]]).astype(np.float32) def test_status(): ms_status = Net() output1 = ms_status(Tensor(x1)) - output2 = ms_status(Tensor(x2)) - output3 = ms_status(Tensor(x3)) expect1 = 1 - expect2 = 1 - expect3 = 0 assert output1.asnumpy()[0] == expect1 + + output2 = ms_status(Tensor(x2)) + expect2 = 1 assert output2.asnumpy()[0] == expect2 + + output3 = ms_status(Tensor(x3)) + expect3 = 0 assert output3.asnumpy()[0] == expect3 @@ -86,13 +88,15 @@ def test_status(): def test_nan(): ms_isnan = Netnan() output1 = ms_isnan(Tensor(x1)) - output2 = ms_isnan(Tensor(x2)) - output3 = ms_isnan(Tensor(x3)) expect1 = [[False, False, True, False]] - expect2 = [[False, False, False, False]] - expect3 = [[False, False], [False, False], [False, False]] assert (output1.asnumpy() == expect1).all() + + output2 = ms_isnan(Tensor(x2)) + expect2 = [[False, False, False, False]] assert (output2.asnumpy() == expect2).all() + + output3 = ms_isnan(Tensor(x3)) + expect3 = [[False, False], [False, False], [False, False]] assert (output3.asnumpy() == expect3).all() @@ -102,13 +106,15 @@ def test_nan(): def test_inf(): ms_isinf = Netinf() output1 = ms_isinf(Tensor(x1)) - output2 = ms_isinf(Tensor(x2)) - output3 = ms_isinf(Tensor(x3)) expect1 = [[False, False, False, False]] - expect2 = [[True, False, False, False]] - expect3 = [[False, False], [False, False], [False, False]] assert (output1.asnumpy() == expect1).all() + + output2 = ms_isinf(Tensor(x2)) + expect2 = [[True, False, False, False]] assert (output2.asnumpy() == expect2).all() + + output3 = ms_isinf(Tensor(x3)) + expect3 = [[False, False], [False, False], [False, False]] assert (output3.asnumpy() == expect3).all() @@ -118,11 +124,13 @@ def test_inf(): def test_finite(): ms_isfinite = Netfinite() output1 = ms_isfinite(Tensor(x1)) - output2 = ms_isfinite(Tensor(x2)) - output3 = ms_isfinite(Tensor(x3)) expect1 = [[True, True, False, True]] - expect2 = [[False, True, True, True]] - expect3 = [[True, True], [True, True], [True, True]] assert (output1.asnumpy() == expect1).all() + + output2 = ms_isfinite(Tensor(x2)) + expect2 = [[False, True, True, True]] assert (output2.asnumpy() == expect2).all() + + output3 = ms_isfinite(Tensor(x3)) + expect3 = [[True, True], [True, True], [True, True]] assert (output3.asnumpy() == expect3).all()