!27929 consider the case inputs and outputs of host kernel will be on CPU

Merge pull request !27929 from lingyunli63/refine_memcpy_of_host_kernel
This commit is contained in:
i-robot 2021-12-23 08:01:51 +00:00 committed by Gitee
commit 1a474138da
6 changed files with 130 additions and 26 deletions

View File

@ -99,17 +99,24 @@ void DynamicReshapeKernel::Execute() {
size_t input_size_byte = LongToSize(arr_prod) * abstract::TypeIdSize(type_x);
auto output_addr = AnfAlgo::GetOutputAddr(cnode, 0);
MS_EXCEPTION_IF_NULL(output_addr);
auto ms_context = MsContext::GetInstance();
MS_EXCEPTION_IF_NULL(ms_context);
auto device_id = ms_context->get_param<uint32_t>(MS_CTX_DEVICE_ID);
auto temp_device_address = std::make_shared<device::ascend::AscendDeviceAddress>(
address_x->GetMutablePtr(), input_size_byte, address_x->format(), address_x->type_id(), kAscendDevice, device_id);
if (!output_addr->SyncDeviceToDevice(temp_device_address.get())) {
MS_LOG(EXCEPTION) << "Host Reshape sync device to device failed.";
if (address_x->DeviceType() == device::DeviceAddressType::kCPU) {
auto ret =
memcpy_s(const_cast<void *>(output_addr->GetPtr()), output_addr->GetSize(), address_x->GetPtr(), input_size_byte);
if (ret != EOK) {
MS_LOG(EXCEPTION) << "Execute DynamicReshapeKernel memcpy_s failed";
}
} else {
auto ms_context = MsContext::GetInstance();
MS_EXCEPTION_IF_NULL(ms_context);
auto device_id = ms_context->get_param<uint32_t>(MS_CTX_DEVICE_ID);
auto temp_device_address = std::make_shared<device::ascend::AscendDeviceAddress>(
address_x->GetMutablePtr(), input_size_byte, address_x->format(), address_x->type_id(), kAscendDevice, device_id);
if (!output_addr->SyncDeviceToDevice(temp_device_address.get())) {
MS_LOG(EXCEPTION) << "Host Reshape sync device to device failed.";
}
MS_LOG(INFO) << "Execute host ReshapeKernel End";
}
MS_LOG(INFO) << "Execute host ReshapeKernel End";
}
device::DynamicKernelPtr DynamicReshapeKernelMod::GenDynamicKernel(const CNodePtr &cnode_ptr, void *stream_ptr) {
return std::make_shared<DynamicReshapeKernel>(stream_ptr, cnode_ptr);
}

View File

@ -47,16 +47,24 @@ void DynamicShapeKernel::Execute() {
auto output_addr = AnfAlgo::GetOutputAddr(cnode, 0);
MS_EXCEPTION_IF_NULL(output_addr);
auto runtime_instance = device::KernelRuntimeManager::Instance().GetCurrentKernelRuntime();
MS_EXCEPTION_IF_NULL(runtime_instance);
auto ret = runtime_instance->SyncStream();
if (!ret) {
MS_LOG(EXCEPTION) << "Sync stream error!";
if (output_addr->DeviceType() == device::DeviceAddressType::kCPU) {
auto ret = memcpy_s(const_cast<void *>(output_addr->GetPtr()), output_addr->GetSize(),
output_tensor_for_sync->data_c(), LongToSize(output_tensor_for_sync->data().nbytes()));
if (ret != EOK) {
MS_LOG(EXCEPTION) << "Execute DynamicShapeKernel memcpy_s failed!";
}
} else {
auto runtime_instance = device::KernelRuntimeManager::Instance().GetCurrentKernelRuntime();
MS_EXCEPTION_IF_NULL(runtime_instance);
auto ret = runtime_instance->SyncStream();
if (!ret) {
MS_LOG(EXCEPTION) << "Sync stream error!";
}
output_addr->SyncHostToDevice(output_shape, LongToSize(output_tensor_for_sync->data().nbytes()),
output_tensor_for_sync->data_type(), output_tensor_for_sync->data_c(),
output_tensor_for_sync->device_info().host_format_);
}
output_addr->SyncHostToDevice(output_shape, LongToSize(output_tensor_for_sync->data().nbytes()),
output_tensor_for_sync->data_type(), output_tensor_for_sync->data_c(),
output_tensor_for_sync->device_info().host_format_);
MS_LOG(INFO) << "Execute DynamicShapeKernel End";
}

View File

@ -24,20 +24,15 @@
namespace mindspore {
namespace kernel {
static const std::set<std::string> host_kernel = {
prim::kPrimDynamicShape->name(), prim::kPrimDynamicBroadcastGradientArgs->name(), prim::kPrimDynamicReshape->name()};
void HostMetadataInfo(const CNodePtr &kernel_node, std::vector<std::shared_ptr<KernelBuildInfo>> *kernel_info_list) {
MS_LOG(INFO) << "HostMetadataInfo.";
MS_EXCEPTION_IF_NULL(kernel_node);
MS_EXCEPTION_IF_NULL(kernel_info_list);
std::string op_name = AnfAlgo::GetCNodeName(kernel_node);
if (host_kernel.find(op_name) == host_kernel.end()) {
MS_LOG(DEBUG) << "Host dose not have op [" << op_name << "]";
if (!AnfAlgo::IsHostKernel(kernel_node)) {
MS_LOG(DEBUG) << "Host dose not have op [" << kernel_node->DebugString() << "]";
return;
}
std::vector<std::string> inputs_format{};
std::vector<TypeId> inputs_type{};
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);

View File

@ -2200,6 +2200,39 @@ void AnfRuntimeAlgorithm::GetAllFatherRealNode(const AnfNodePtr &anf_node, std::
}
}
bool AnfRuntimeAlgorithm::IsHostKernel(const CNodePtr &kernel_node) {
const std::set<std::string> host_kernel = {prim::kPrimDynamicShape->name(), prim::kPrimDynamicReshape->name(),
prim::kPrimDynamicBroadcastGradientArgs->name()};
auto op_name = AnfAlgo::GetCNodeName(kernel_node);
if (host_kernel.find(op_name) == host_kernel.end()) {
return false;
}
return true;
}
namespace {
// Host kernel with inputs on host
bool SkipDataSync(const CNodePtr &node, const std::map<uint32_t, tensor::TensorPtr> &depend_tensors) {
if (!AnfAlgo::IsHostKernel(node)) {
return false;
}
auto input_size = AnfAlgo::GetInputTensorNum(node);
for (size_t i = 0; i < input_size; ++i) {
auto input_with_index = AnfAlgo::GetPrevNodeOutput(node, i);
auto real_input = input_with_index.first;
auto iter_tensor = depend_tensors.find(i);
if (iter_tensor != depend_tensors.end()) {
auto output_addr = AnfAlgo::GetOutputAddr(real_input, 0);
MS_EXCEPTION_IF_NULL(output_addr);
if (output_addr->DeviceType() != device::DeviceAddressType::kCPU) {
return false;
}
}
}
return true;
}
} // namespace
void AnfRuntimeAlgorithm::InferShape(const CNodePtr &node, std::map<uint32_t, tensor::TensorPtr> *depend_tensors) {
MS_EXCEPTION_IF_NULL(node);
MS_LOG(INFO) << "InferShape start, node:" << node->DebugString();
@ -2222,8 +2255,10 @@ void AnfRuntimeAlgorithm::InferShape(const CNodePtr &node, std::map<uint32_t, te
if (iter_tensor != depend_tensors->end()) {
auto tensor_ptr = iter_tensor->second;
MS_EXCEPTION_IF_NULL(tensor_ptr);
// sync data from device to host
tensor_ptr->data_sync();
if (!SkipDataSync(node, *depend_tensors)) {
// sync data from device to host
tensor_ptr->data_sync();
}
auto real_abs = real_input->abstract();
if (real_abs->isa<abstract::AbstractTensor>()) {
real_input->abstract()->set_value(tensor_ptr);

View File

@ -297,6 +297,7 @@ class AnfRuntimeAlgorithm {
static std::vector<int64_t> GetOutputMaxShape(const AnfNodePtr &anf_node, size_t index);
static std::vector<int64_t> GetOutputMinShape(const AnfNodePtr &anf_node, size_t index);
static bool IsNodeDynamicShape(const AnfNodePtr &node);
static bool IsHostKernel(const CNodePtr &node);
static void InferShape(const CNodePtr &node, std::map<uint32_t, tensor::TensorPtr> *depend_tensors = nullptr);
static void AddArgList(AbstractBasePtrList *args_spec_list, const AnfNodePtr &cnode_input,
const AnfNodePtr &real_input, size_t index);

View File

@ -0,0 +1,58 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
import numpy as np
import pytest
import mindspore.context as context
import mindspore.nn as nn
import mindspore.dataset as ds
from mindspore.ops import operations as P
from mindspore import Model
context.set_context(mode=context.GRAPH_MODE,
device_target="Ascend")
def dataset_generator():
for i in range(1, 10):
yield(np.ones((32, 2*i), dtype=np.float32), np.ones((32, 2*i), dtype=np.float32))
class Net(nn.Cell):
def __init__(self):
super(Net, self).__init__()
self.unique = P.Unique()
self.shape = P.DynamicShape()
self.reshape = P.Reshape()
self.add = P.Add()
def construct(self, x, y):
val = self.add(x, y)
size = self.shape(val)
res = self.reshape(val, size)
return res
@pytest.mark.level0
@pytest.mark.platform_arm_ascend_training
@pytest.mark.platform_x86_ascend_training
@pytest.mark.env_onecard
def test_shape():
"""
Feature: dynamic shape
Description: dynamic shape input data set
Expectation: success
"""
network = Net()
dataset = ds.GeneratorDataset(dataset_generator, ["data1", "data2"])
dataset.set_dynamic_columns(columns={"data1": [32, None], "data2": [32, None]})
model = Model(network)
model.train(1, dataset, sink_size=1)