forked from mindspore-Ecosystem/mindspore
refine data copy in multi-graph
This commit is contained in:
parent
02f33a17b5
commit
a25b84055c
|
@ -25,6 +25,7 @@
|
||||||
#include "device/kernel_runtime_manager.h"
|
#include "device/kernel_runtime_manager.h"
|
||||||
#include "predict/predict.h"
|
#include "predict/predict.h"
|
||||||
#include "common/utils.h"
|
#include "common/utils.h"
|
||||||
|
#include "common/trans.h"
|
||||||
#include "utils/context/ms_context.h"
|
#include "utils/context/ms_context.h"
|
||||||
|
|
||||||
namespace mindspore {
|
namespace mindspore {
|
||||||
|
@ -83,6 +84,49 @@ void GPUSession::RunOpAllocateMemory(const std::vector<tensor::TensorPtr> &input
|
||||||
runtime_instance->RunOpAssignMemory(input_tensors, kernel_graph);
|
runtime_instance->RunOpAssignMemory(input_tensors, kernel_graph);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void GPUSession::LoadInputData(const std::shared_ptr<KernelGraph> &kernel_graph,
|
||||||
|
const std::vector<tensor::TensorPtr> &inputs_const) const {
|
||||||
|
std::vector<tensor::TensorPtr> inputs(inputs_const);
|
||||||
|
MS_EXCEPTION_IF_NULL(kernel_graph);
|
||||||
|
auto input_nodes = kernel_graph->inputs();
|
||||||
|
auto ms_context = MsContext::GetInstance();
|
||||||
|
MS_EXCEPTION_IF_NULL(ms_context);
|
||||||
|
|
||||||
|
for (size_t i = 0; i < inputs.size(); ++i) {
|
||||||
|
auto tensor = inputs[i];
|
||||||
|
MS_EXCEPTION_IF_NULL(tensor);
|
||||||
|
auto input_node = input_nodes[i];
|
||||||
|
MS_EXCEPTION_IF_NULL(input_node);
|
||||||
|
if (input_node->isa<Parameter>() && AnfAlgo::OutputAddrExist(input_node, 0)) {
|
||||||
|
auto pk_node = input_node->cast<ParameterPtr>();
|
||||||
|
auto device_address = AnfAlgo::GetMutableOutputAddr(pk_node, 0);
|
||||||
|
bool need_sync = false;
|
||||||
|
if (ms_context->enable_pynative_infer()) {
|
||||||
|
if (tensor->device_address().get() == nullptr || tensor->device_address() != device_address) {
|
||||||
|
need_sync = true;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (tensor->is_dirty()) {
|
||||||
|
need_sync = true;
|
||||||
|
} else if (tensor->device_address() != device_address) {
|
||||||
|
AnfAlgo::SetOutputAddr(tensor->device_address(), 0, pk_node.get());
|
||||||
|
need_sync = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (need_sync) {
|
||||||
|
tensor->set_device_address(device_address);
|
||||||
|
MS_EXCEPTION_IF_NULL(device_address);
|
||||||
|
if (!device_address->SyncHostToDevice(trans::GetRuntimePaddingShape(pk_node, 0),
|
||||||
|
LongToSize(tensor->data().nbytes()), tensor->data_type(),
|
||||||
|
tensor->data_c(false))) {
|
||||||
|
MS_LOG(EXCEPTION) << "SyncHostToDevice failed.";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
tensor->set_dirty(false);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void GPUSession::Execute(const std::shared_ptr<KernelGraph> &kernel_graph) const {
|
void GPUSession::Execute(const std::shared_ptr<KernelGraph> &kernel_graph) const {
|
||||||
auto runtime_instance = device::KernelRuntimeManager::Instance().GetSingleKernelRuntime(kGPUDevice, device_id_);
|
auto runtime_instance = device::KernelRuntimeManager::Instance().GetSingleKernelRuntime(kGPUDevice, device_id_);
|
||||||
MS_EXCEPTION_IF_NULL(runtime_instance);
|
MS_EXCEPTION_IF_NULL(runtime_instance);
|
||||||
|
|
|
@ -59,6 +59,9 @@ class GPUSession : public SessionBasic {
|
||||||
|
|
||||||
void RunOpAllocateMemory(const std::vector<tensor::TensorPtr> &input_tensors, KernelGraph *kernel_graph) const;
|
void RunOpAllocateMemory(const std::vector<tensor::TensorPtr> &input_tensors, KernelGraph *kernel_graph) const;
|
||||||
|
|
||||||
|
void LoadInputData(const std::shared_ptr<KernelGraph> &kernel_graph,
|
||||||
|
const std::vector<tensor::TensorPtr> &inputs_const) const override;
|
||||||
|
|
||||||
void Execute(const std::shared_ptr<KernelGraph> &kernel_graph) const;
|
void Execute(const std::shared_ptr<KernelGraph> &kernel_graph) const;
|
||||||
};
|
};
|
||||||
using GPUSessionPtr = std::shared_ptr<GPUSession>;
|
using GPUSessionPtr = std::shared_ptr<GPUSession>;
|
||||||
|
|
|
@ -129,7 +129,7 @@ BaseRef CreateOneTensor(const AnfNodePtr &node, size_t output_index, const Kerne
|
||||||
// if in paynative mode,data only copyed to host when user want to print data
|
// if in paynative mode,data only copyed to host when user want to print data
|
||||||
auto ms_context = MsContext::GetInstance();
|
auto ms_context = MsContext::GetInstance();
|
||||||
MS_EXCEPTION_IF_NULL(ms_context);
|
MS_EXCEPTION_IF_NULL(ms_context);
|
||||||
if (ms_context->execution_mode() == kPynativeMode) {
|
if (ms_context->execution_mode() == kPynativeMode || ms_context->device_target() == kGPUDevice) {
|
||||||
tensor->set_device_address(AnfAlgo::GetMutableOutputAddr(node, output_index));
|
tensor->set_device_address(AnfAlgo::GetMutableOutputAddr(node, output_index));
|
||||||
tensor->set_dirty(false);
|
tensor->set_dirty(false);
|
||||||
} else if (!address->SyncDeviceToHost(trans::GetRuntimePaddingShape(node, output_index),
|
} else if (!address->SyncDeviceToHost(trans::GetRuntimePaddingShape(node, output_index),
|
||||||
|
|
|
@ -216,7 +216,7 @@ bool ValueToBool(const ValuePtr &v, bool *value) {
|
||||||
} else if (v->isa<tensor::Tensor>()) {
|
} else if (v->isa<tensor::Tensor>()) {
|
||||||
auto tensor = v->cast<tensor::TensorPtr>();
|
auto tensor = v->cast<tensor::TensorPtr>();
|
||||||
MS_EXCEPTION_IF_NULL(tensor);
|
MS_EXCEPTION_IF_NULL(tensor);
|
||||||
|
(void)tensor->data_sync();
|
||||||
bool *tensor_data = static_cast<bool *>(tensor->data_c());
|
bool *tensor_data = static_cast<bool *>(tensor->data_c());
|
||||||
// maybe need to support if tensor is a bool array
|
// maybe need to support if tensor is a bool array
|
||||||
auto vb = tensor_data[0];
|
auto vb = tensor_data[0];
|
||||||
|
|
|
@ -70,13 +70,15 @@ x3 = np.array([[1, 2], [3, 4], [5.0, 88.0]]).astype(np.float32)
|
||||||
def test_status():
|
def test_status():
|
||||||
ms_status = Net()
|
ms_status = Net()
|
||||||
output1 = ms_status(Tensor(x1))
|
output1 = ms_status(Tensor(x1))
|
||||||
output2 = ms_status(Tensor(x2))
|
|
||||||
output3 = ms_status(Tensor(x3))
|
|
||||||
expect1 = 1
|
expect1 = 1
|
||||||
expect2 = 1
|
|
||||||
expect3 = 0
|
|
||||||
assert output1.asnumpy()[0] == expect1
|
assert output1.asnumpy()[0] == expect1
|
||||||
|
|
||||||
|
output2 = ms_status(Tensor(x2))
|
||||||
|
expect2 = 1
|
||||||
assert output2.asnumpy()[0] == expect2
|
assert output2.asnumpy()[0] == expect2
|
||||||
|
|
||||||
|
output3 = ms_status(Tensor(x3))
|
||||||
|
expect3 = 0
|
||||||
assert output3.asnumpy()[0] == expect3
|
assert output3.asnumpy()[0] == expect3
|
||||||
|
|
||||||
|
|
||||||
|
@ -86,13 +88,15 @@ def test_status():
|
||||||
def test_nan():
|
def test_nan():
|
||||||
ms_isnan = Netnan()
|
ms_isnan = Netnan()
|
||||||
output1 = ms_isnan(Tensor(x1))
|
output1 = ms_isnan(Tensor(x1))
|
||||||
output2 = ms_isnan(Tensor(x2))
|
|
||||||
output3 = ms_isnan(Tensor(x3))
|
|
||||||
expect1 = [[False, False, True, False]]
|
expect1 = [[False, False, True, False]]
|
||||||
expect2 = [[False, False, False, False]]
|
|
||||||
expect3 = [[False, False], [False, False], [False, False]]
|
|
||||||
assert (output1.asnumpy() == expect1).all()
|
assert (output1.asnumpy() == expect1).all()
|
||||||
|
|
||||||
|
output2 = ms_isnan(Tensor(x2))
|
||||||
|
expect2 = [[False, False, False, False]]
|
||||||
assert (output2.asnumpy() == expect2).all()
|
assert (output2.asnumpy() == expect2).all()
|
||||||
|
|
||||||
|
output3 = ms_isnan(Tensor(x3))
|
||||||
|
expect3 = [[False, False], [False, False], [False, False]]
|
||||||
assert (output3.asnumpy() == expect3).all()
|
assert (output3.asnumpy() == expect3).all()
|
||||||
|
|
||||||
|
|
||||||
|
@ -102,13 +106,15 @@ def test_nan():
|
||||||
def test_inf():
|
def test_inf():
|
||||||
ms_isinf = Netinf()
|
ms_isinf = Netinf()
|
||||||
output1 = ms_isinf(Tensor(x1))
|
output1 = ms_isinf(Tensor(x1))
|
||||||
output2 = ms_isinf(Tensor(x2))
|
|
||||||
output3 = ms_isinf(Tensor(x3))
|
|
||||||
expect1 = [[False, False, False, False]]
|
expect1 = [[False, False, False, False]]
|
||||||
expect2 = [[True, False, False, False]]
|
|
||||||
expect3 = [[False, False], [False, False], [False, False]]
|
|
||||||
assert (output1.asnumpy() == expect1).all()
|
assert (output1.asnumpy() == expect1).all()
|
||||||
|
|
||||||
|
output2 = ms_isinf(Tensor(x2))
|
||||||
|
expect2 = [[True, False, False, False]]
|
||||||
assert (output2.asnumpy() == expect2).all()
|
assert (output2.asnumpy() == expect2).all()
|
||||||
|
|
||||||
|
output3 = ms_isinf(Tensor(x3))
|
||||||
|
expect3 = [[False, False], [False, False], [False, False]]
|
||||||
assert (output3.asnumpy() == expect3).all()
|
assert (output3.asnumpy() == expect3).all()
|
||||||
|
|
||||||
|
|
||||||
|
@ -118,11 +124,13 @@ def test_inf():
|
||||||
def test_finite():
|
def test_finite():
|
||||||
ms_isfinite = Netfinite()
|
ms_isfinite = Netfinite()
|
||||||
output1 = ms_isfinite(Tensor(x1))
|
output1 = ms_isfinite(Tensor(x1))
|
||||||
output2 = ms_isfinite(Tensor(x2))
|
|
||||||
output3 = ms_isfinite(Tensor(x3))
|
|
||||||
expect1 = [[True, True, False, True]]
|
expect1 = [[True, True, False, True]]
|
||||||
expect2 = [[False, True, True, True]]
|
|
||||||
expect3 = [[True, True], [True, True], [True, True]]
|
|
||||||
assert (output1.asnumpy() == expect1).all()
|
assert (output1.asnumpy() == expect1).all()
|
||||||
|
|
||||||
|
output2 = ms_isfinite(Tensor(x2))
|
||||||
|
expect2 = [[False, True, True, True]]
|
||||||
assert (output2.asnumpy() == expect2).all()
|
assert (output2.asnumpy() == expect2).all()
|
||||||
|
|
||||||
|
output3 = ms_isfinite(Tensor(x3))
|
||||||
|
expect3 = [[True, True], [True, True], [True, True]]
|
||||||
assert (output3.asnumpy() == expect3).all()
|
assert (output3.asnumpy() == expect3).all()
|
||||||
|
|
Loading…
Reference in New Issue