forked from mindspore-Ecosystem/mindspore
!27929 consider the case inputs and outputs of host kernel will be on CPU
Merge pull request !27929 from lingyunli63/refine_memcpy_of_host_kernel
This commit is contained in:
commit
1a474138da
|
@ -99,17 +99,24 @@ void DynamicReshapeKernel::Execute() {
|
|||
size_t input_size_byte = LongToSize(arr_prod) * abstract::TypeIdSize(type_x);
|
||||
auto output_addr = AnfAlgo::GetOutputAddr(cnode, 0);
|
||||
MS_EXCEPTION_IF_NULL(output_addr);
|
||||
auto ms_context = MsContext::GetInstance();
|
||||
MS_EXCEPTION_IF_NULL(ms_context);
|
||||
auto device_id = ms_context->get_param<uint32_t>(MS_CTX_DEVICE_ID);
|
||||
auto temp_device_address = std::make_shared<device::ascend::AscendDeviceAddress>(
|
||||
address_x->GetMutablePtr(), input_size_byte, address_x->format(), address_x->type_id(), kAscendDevice, device_id);
|
||||
if (!output_addr->SyncDeviceToDevice(temp_device_address.get())) {
|
||||
MS_LOG(EXCEPTION) << "Host Reshape sync device to device failed.";
|
||||
if (address_x->DeviceType() == device::DeviceAddressType::kCPU) {
|
||||
auto ret =
|
||||
memcpy_s(const_cast<void *>(output_addr->GetPtr()), output_addr->GetSize(), address_x->GetPtr(), input_size_byte);
|
||||
if (ret != EOK) {
|
||||
MS_LOG(EXCEPTION) << "Execute DynamicReshapeKernel memcpy_s failed";
|
||||
}
|
||||
} else {
|
||||
auto ms_context = MsContext::GetInstance();
|
||||
MS_EXCEPTION_IF_NULL(ms_context);
|
||||
auto device_id = ms_context->get_param<uint32_t>(MS_CTX_DEVICE_ID);
|
||||
auto temp_device_address = std::make_shared<device::ascend::AscendDeviceAddress>(
|
||||
address_x->GetMutablePtr(), input_size_byte, address_x->format(), address_x->type_id(), kAscendDevice, device_id);
|
||||
if (!output_addr->SyncDeviceToDevice(temp_device_address.get())) {
|
||||
MS_LOG(EXCEPTION) << "Host Reshape sync device to device failed.";
|
||||
}
|
||||
MS_LOG(INFO) << "Execute host ReshapeKernel End";
|
||||
}
|
||||
MS_LOG(INFO) << "Execute host ReshapeKernel End";
|
||||
}
|
||||
|
||||
device::DynamicKernelPtr DynamicReshapeKernelMod::GenDynamicKernel(const CNodePtr &cnode_ptr, void *stream_ptr) {
|
||||
return std::make_shared<DynamicReshapeKernel>(stream_ptr, cnode_ptr);
|
||||
}
|
||||
|
|
|
@ -47,16 +47,24 @@ void DynamicShapeKernel::Execute() {
|
|||
auto output_addr = AnfAlgo::GetOutputAddr(cnode, 0);
|
||||
MS_EXCEPTION_IF_NULL(output_addr);
|
||||
|
||||
auto runtime_instance = device::KernelRuntimeManager::Instance().GetCurrentKernelRuntime();
|
||||
MS_EXCEPTION_IF_NULL(runtime_instance);
|
||||
auto ret = runtime_instance->SyncStream();
|
||||
if (!ret) {
|
||||
MS_LOG(EXCEPTION) << "Sync stream error!";
|
||||
if (output_addr->DeviceType() == device::DeviceAddressType::kCPU) {
|
||||
auto ret = memcpy_s(const_cast<void *>(output_addr->GetPtr()), output_addr->GetSize(),
|
||||
output_tensor_for_sync->data_c(), LongToSize(output_tensor_for_sync->data().nbytes()));
|
||||
if (ret != EOK) {
|
||||
MS_LOG(EXCEPTION) << "Execute DynamicShapeKernel memcpy_s failed!";
|
||||
}
|
||||
} else {
|
||||
auto runtime_instance = device::KernelRuntimeManager::Instance().GetCurrentKernelRuntime();
|
||||
MS_EXCEPTION_IF_NULL(runtime_instance);
|
||||
auto ret = runtime_instance->SyncStream();
|
||||
if (!ret) {
|
||||
MS_LOG(EXCEPTION) << "Sync stream error!";
|
||||
}
|
||||
output_addr->SyncHostToDevice(output_shape, LongToSize(output_tensor_for_sync->data().nbytes()),
|
||||
output_tensor_for_sync->data_type(), output_tensor_for_sync->data_c(),
|
||||
output_tensor_for_sync->device_info().host_format_);
|
||||
}
|
||||
|
||||
output_addr->SyncHostToDevice(output_shape, LongToSize(output_tensor_for_sync->data().nbytes()),
|
||||
output_tensor_for_sync->data_type(), output_tensor_for_sync->data_c(),
|
||||
output_tensor_for_sync->device_info().host_format_);
|
||||
MS_LOG(INFO) << "Execute DynamicShapeKernel End";
|
||||
}
|
||||
|
||||
|
|
|
@ -24,20 +24,15 @@
|
|||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
static const std::set<std::string> host_kernel = {
|
||||
prim::kPrimDynamicShape->name(), prim::kPrimDynamicBroadcastGradientArgs->name(), prim::kPrimDynamicReshape->name()};
|
||||
|
||||
void HostMetadataInfo(const CNodePtr &kernel_node, std::vector<std::shared_ptr<KernelBuildInfo>> *kernel_info_list) {
|
||||
MS_LOG(INFO) << "HostMetadataInfo.";
|
||||
MS_EXCEPTION_IF_NULL(kernel_node);
|
||||
MS_EXCEPTION_IF_NULL(kernel_info_list);
|
||||
|
||||
std::string op_name = AnfAlgo::GetCNodeName(kernel_node);
|
||||
if (host_kernel.find(op_name) == host_kernel.end()) {
|
||||
MS_LOG(DEBUG) << "Host dose not have op [" << op_name << "]";
|
||||
if (!AnfAlgo::IsHostKernel(kernel_node)) {
|
||||
MS_LOG(DEBUG) << "Host dose not have op [" << kernel_node->DebugString() << "]";
|
||||
return;
|
||||
}
|
||||
|
||||
std::vector<std::string> inputs_format{};
|
||||
std::vector<TypeId> inputs_type{};
|
||||
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
|
||||
|
|
|
@ -2200,6 +2200,39 @@ void AnfRuntimeAlgorithm::GetAllFatherRealNode(const AnfNodePtr &anf_node, std::
|
|||
}
|
||||
}
|
||||
|
||||
bool AnfRuntimeAlgorithm::IsHostKernel(const CNodePtr &kernel_node) {
|
||||
const std::set<std::string> host_kernel = {prim::kPrimDynamicShape->name(), prim::kPrimDynamicReshape->name(),
|
||||
prim::kPrimDynamicBroadcastGradientArgs->name()};
|
||||
auto op_name = AnfAlgo::GetCNodeName(kernel_node);
|
||||
if (host_kernel.find(op_name) == host_kernel.end()) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
namespace {
|
||||
// Host kernel with inputs on host
|
||||
bool SkipDataSync(const CNodePtr &node, const std::map<uint32_t, tensor::TensorPtr> &depend_tensors) {
|
||||
if (!AnfAlgo::IsHostKernel(node)) {
|
||||
return false;
|
||||
}
|
||||
auto input_size = AnfAlgo::GetInputTensorNum(node);
|
||||
for (size_t i = 0; i < input_size; ++i) {
|
||||
auto input_with_index = AnfAlgo::GetPrevNodeOutput(node, i);
|
||||
auto real_input = input_with_index.first;
|
||||
auto iter_tensor = depend_tensors.find(i);
|
||||
if (iter_tensor != depend_tensors.end()) {
|
||||
auto output_addr = AnfAlgo::GetOutputAddr(real_input, 0);
|
||||
MS_EXCEPTION_IF_NULL(output_addr);
|
||||
if (output_addr->DeviceType() != device::DeviceAddressType::kCPU) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
} // namespace
|
||||
|
||||
void AnfRuntimeAlgorithm::InferShape(const CNodePtr &node, std::map<uint32_t, tensor::TensorPtr> *depend_tensors) {
|
||||
MS_EXCEPTION_IF_NULL(node);
|
||||
MS_LOG(INFO) << "InferShape start, node:" << node->DebugString();
|
||||
|
@ -2222,8 +2255,10 @@ void AnfRuntimeAlgorithm::InferShape(const CNodePtr &node, std::map<uint32_t, te
|
|||
if (iter_tensor != depend_tensors->end()) {
|
||||
auto tensor_ptr = iter_tensor->second;
|
||||
MS_EXCEPTION_IF_NULL(tensor_ptr);
|
||||
// sync data from device to host
|
||||
tensor_ptr->data_sync();
|
||||
if (!SkipDataSync(node, *depend_tensors)) {
|
||||
// sync data from device to host
|
||||
tensor_ptr->data_sync();
|
||||
}
|
||||
auto real_abs = real_input->abstract();
|
||||
if (real_abs->isa<abstract::AbstractTensor>()) {
|
||||
real_input->abstract()->set_value(tensor_ptr);
|
||||
|
|
|
@ -297,6 +297,7 @@ class AnfRuntimeAlgorithm {
|
|||
static std::vector<int64_t> GetOutputMaxShape(const AnfNodePtr &anf_node, size_t index);
|
||||
static std::vector<int64_t> GetOutputMinShape(const AnfNodePtr &anf_node, size_t index);
|
||||
static bool IsNodeDynamicShape(const AnfNodePtr &node);
|
||||
static bool IsHostKernel(const CNodePtr &node);
|
||||
static void InferShape(const CNodePtr &node, std::map<uint32_t, tensor::TensorPtr> *depend_tensors = nullptr);
|
||||
static void AddArgList(AbstractBasePtrList *args_spec_list, const AnfNodePtr &cnode_input,
|
||||
const AnfNodePtr &real_input, size_t index);
|
||||
|
|
|
@ -0,0 +1,58 @@
|
|||
# Copyright 2020 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
import numpy as np
|
||||
import pytest
|
||||
import mindspore.context as context
|
||||
import mindspore.nn as nn
|
||||
import mindspore.dataset as ds
|
||||
from mindspore.ops import operations as P
|
||||
from mindspore import Model
|
||||
|
||||
context.set_context(mode=context.GRAPH_MODE,
|
||||
device_target="Ascend")
|
||||
|
||||
def dataset_generator():
|
||||
for i in range(1, 10):
|
||||
yield(np.ones((32, 2*i), dtype=np.float32), np.ones((32, 2*i), dtype=np.float32))
|
||||
|
||||
class Net(nn.Cell):
|
||||
def __init__(self):
|
||||
super(Net, self).__init__()
|
||||
self.unique = P.Unique()
|
||||
self.shape = P.DynamicShape()
|
||||
self.reshape = P.Reshape()
|
||||
self.add = P.Add()
|
||||
|
||||
def construct(self, x, y):
|
||||
val = self.add(x, y)
|
||||
size = self.shape(val)
|
||||
res = self.reshape(val, size)
|
||||
return res
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_arm_ascend_training
|
||||
@pytest.mark.platform_x86_ascend_training
|
||||
@pytest.mark.env_onecard
|
||||
def test_shape():
|
||||
"""
|
||||
Feature: dynamic shape
|
||||
Description: dynamic shape input data set
|
||||
Expectation: success
|
||||
"""
|
||||
network = Net()
|
||||
dataset = ds.GeneratorDataset(dataset_generator, ["data1", "data2"])
|
||||
dataset.set_dynamic_columns(columns={"data1": [32, None], "data2": [32, None]})
|
||||
model = Model(network)
|
||||
model.train(1, dataset, sink_size=1)
|
Loading…
Reference in New Issue