From e54aabf53dc915f1e9a7384963b0a36e792b393a Mon Sep 17 00:00:00 2001 From: lingyunli63 Date: Mon, 20 Dec 2021 11:56:26 +0800 Subject: [PATCH] refine infershape and execute --- .../host/dynamic_reshape_kernel.cc | 25 +++++--- .../host/dynamic_shape_kernel.cc | 24 +++++--- .../host/host_kernel_metadata.cc | 9 +-- .../backend/session/anf_runtime_algorithm.cc | 39 ++++++++++++- .../backend/session/anf_runtime_algorithm.h | 1 + tests/st/ops/ascend/test_shape.py | 58 +++++++++++++++++++ 6 files changed, 130 insertions(+), 26 deletions(-) create mode 100644 tests/st/ops/ascend/test_shape.py diff --git a/mindspore/ccsrc/backend/kernel_compiler/host/dynamic_reshape_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/host/dynamic_reshape_kernel.cc index 463f71aed5c..5030ae9e82b 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/host/dynamic_reshape_kernel.cc +++ b/mindspore/ccsrc/backend/kernel_compiler/host/dynamic_reshape_kernel.cc @@ -99,17 +99,24 @@ void DynamicReshapeKernel::Execute() { size_t input_size_byte = LongToSize(arr_prod) * abstract::TypeIdSize(type_x); auto output_addr = AnfAlgo::GetOutputAddr(cnode, 0); MS_EXCEPTION_IF_NULL(output_addr); - auto ms_context = MsContext::GetInstance(); - MS_EXCEPTION_IF_NULL(ms_context); - auto device_id = ms_context->get_param(MS_CTX_DEVICE_ID); - auto temp_device_address = std::make_shared( - address_x->GetMutablePtr(), input_size_byte, address_x->format(), address_x->type_id(), kAscendDevice, device_id); - if (!output_addr->SyncDeviceToDevice(temp_device_address.get())) { - MS_LOG(EXCEPTION) << "Host Reshape sync device to device failed."; + if (address_x->DeviceType() == device::DeviceAddressType::kCPU) { + auto ret = + memcpy_s(const_cast(output_addr->GetPtr()), output_addr->GetSize(), address_x->GetPtr(), input_size_byte); + if (ret != EOK) { + MS_LOG(EXCEPTION) << "Execute DynamicReshapeKernel memcpy_s failed"; + } + } else { + auto ms_context = MsContext::GetInstance(); + MS_EXCEPTION_IF_NULL(ms_context); + auto device_id = ms_context->get_param(MS_CTX_DEVICE_ID); + auto temp_device_address = std::make_shared( + address_x->GetMutablePtr(), input_size_byte, address_x->format(), address_x->type_id(), kAscendDevice, device_id); + if (!output_addr->SyncDeviceToDevice(temp_device_address.get())) { + MS_LOG(EXCEPTION) << "Host Reshape sync device to device failed."; + } + MS_LOG(INFO) << "Execute host ReshapeKernel End"; } - MS_LOG(INFO) << "Execute host ReshapeKernel End"; } - device::DynamicKernelPtr DynamicReshapeKernelMod::GenDynamicKernel(const CNodePtr &cnode_ptr, void *stream_ptr) { return std::make_shared(stream_ptr, cnode_ptr); } diff --git a/mindspore/ccsrc/backend/kernel_compiler/host/dynamic_shape_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/host/dynamic_shape_kernel.cc index 226da9e5a34..2f811b8733f 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/host/dynamic_shape_kernel.cc +++ b/mindspore/ccsrc/backend/kernel_compiler/host/dynamic_shape_kernel.cc @@ -47,16 +47,24 @@ void DynamicShapeKernel::Execute() { auto output_addr = AnfAlgo::GetOutputAddr(cnode, 0); MS_EXCEPTION_IF_NULL(output_addr); - auto runtime_instance = device::KernelRuntimeManager::Instance().GetCurrentKernelRuntime(); - MS_EXCEPTION_IF_NULL(runtime_instance); - auto ret = runtime_instance->SyncStream(); - if (!ret) { - MS_LOG(EXCEPTION) << "Sync stream error!"; + if (output_addr->DeviceType() == device::DeviceAddressType::kCPU) { + auto ret = memcpy_s(const_cast(output_addr->GetPtr()), output_addr->GetSize(), + output_tensor_for_sync->data_c(), LongToSize(output_tensor_for_sync->data().nbytes())); + if (ret != EOK) { + MS_LOG(EXCEPTION) << "Execute DynamicShapeKernel memcpy_s failed!"; + } + } else { + auto runtime_instance = device::KernelRuntimeManager::Instance().GetCurrentKernelRuntime(); + MS_EXCEPTION_IF_NULL(runtime_instance); + auto ret = runtime_instance->SyncStream(); + if (!ret) { + MS_LOG(EXCEPTION) << "Sync stream error!"; + } + output_addr->SyncHostToDevice(output_shape, LongToSize(output_tensor_for_sync->data().nbytes()), + output_tensor_for_sync->data_type(), output_tensor_for_sync->data_c(), + output_tensor_for_sync->device_info().host_format_); } - output_addr->SyncHostToDevice(output_shape, LongToSize(output_tensor_for_sync->data().nbytes()), - output_tensor_for_sync->data_type(), output_tensor_for_sync->data_c(), - output_tensor_for_sync->device_info().host_format_); MS_LOG(INFO) << "Execute DynamicShapeKernel End"; } diff --git a/mindspore/ccsrc/backend/kernel_compiler/host/host_kernel_metadata.cc b/mindspore/ccsrc/backend/kernel_compiler/host/host_kernel_metadata.cc index 19e5849ccd5..674c02991cd 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/host/host_kernel_metadata.cc +++ b/mindspore/ccsrc/backend/kernel_compiler/host/host_kernel_metadata.cc @@ -24,20 +24,15 @@ namespace mindspore { namespace kernel { -static const std::set host_kernel = { - prim::kPrimDynamicShape->name(), prim::kPrimDynamicBroadcastGradientArgs->name(), prim::kPrimDynamicReshape->name()}; - void HostMetadataInfo(const CNodePtr &kernel_node, std::vector> *kernel_info_list) { MS_LOG(INFO) << "HostMetadataInfo."; MS_EXCEPTION_IF_NULL(kernel_node); MS_EXCEPTION_IF_NULL(kernel_info_list); - std::string op_name = AnfAlgo::GetCNodeName(kernel_node); - if (host_kernel.find(op_name) == host_kernel.end()) { - MS_LOG(DEBUG) << "Host dose not have op [" << op_name << "]"; + if (!AnfAlgo::IsHostKernel(kernel_node)) { + MS_LOG(DEBUG) << "Host dose not have op [" << kernel_node->DebugString() << "]"; return; } - std::vector inputs_format{}; std::vector inputs_type{}; size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); diff --git a/mindspore/ccsrc/backend/session/anf_runtime_algorithm.cc b/mindspore/ccsrc/backend/session/anf_runtime_algorithm.cc index 3d351620545..1e9aa927285 100644 --- a/mindspore/ccsrc/backend/session/anf_runtime_algorithm.cc +++ b/mindspore/ccsrc/backend/session/anf_runtime_algorithm.cc @@ -2233,6 +2233,39 @@ void AnfRuntimeAlgorithm::GetAllFatherRealNode(const AnfNodePtr &anf_node, std:: } } +bool AnfRuntimeAlgorithm::IsHostKernel(const CNodePtr &kernel_node) { + const std::set host_kernel = {prim::kPrimDynamicShape->name(), prim::kPrimDynamicReshape->name(), + prim::kPrimDynamicBroadcastGradientArgs->name()}; + auto op_name = AnfAlgo::GetCNodeName(kernel_node); + if (host_kernel.find(op_name) == host_kernel.end()) { + return false; + } + return true; +} + +namespace { +// Host kernel with inputs on host +bool SkipDataSync(const CNodePtr &node, const std::map &depend_tensors) { + if (!AnfAlgo::IsHostKernel(node)) { + return false; + } + auto input_size = AnfAlgo::GetInputTensorNum(node); + for (size_t i = 0; i < input_size; ++i) { + auto input_with_index = AnfAlgo::GetPrevNodeOutput(node, i); + auto real_input = input_with_index.first; + auto iter_tensor = depend_tensors.find(i); + if (iter_tensor != depend_tensors.end()) { + auto output_addr = AnfAlgo::GetOutputAddr(real_input, 0); + MS_EXCEPTION_IF_NULL(output_addr); + if (output_addr->DeviceType() != device::DeviceAddressType::kCPU) { + return false; + } + } + } + return true; +} +} // namespace + void AnfRuntimeAlgorithm::InferShape(const CNodePtr &node, std::map *depend_tensors) { MS_EXCEPTION_IF_NULL(node); MS_LOG(INFO) << "InferShape start, node:" << node->DebugString(); @@ -2255,8 +2288,10 @@ void AnfRuntimeAlgorithm::InferShape(const CNodePtr &node, std::mapend()) { auto tensor_ptr = iter_tensor->second; MS_EXCEPTION_IF_NULL(tensor_ptr); - // sync data from device to host - tensor_ptr->data_sync(); + if (!SkipDataSync(node, *depend_tensors)) { + // sync data from device to host + tensor_ptr->data_sync(); + } auto real_abs = real_input->abstract(); if (real_abs->isa()) { real_input->abstract()->set_value(tensor_ptr); diff --git a/mindspore/ccsrc/backend/session/anf_runtime_algorithm.h b/mindspore/ccsrc/backend/session/anf_runtime_algorithm.h index a44ab69beb4..632c261eaa0 100644 --- a/mindspore/ccsrc/backend/session/anf_runtime_algorithm.h +++ b/mindspore/ccsrc/backend/session/anf_runtime_algorithm.h @@ -297,6 +297,7 @@ class AnfRuntimeAlgorithm { static std::vector GetOutputMaxShape(const AnfNodePtr &anf_node, size_t index); static std::vector GetOutputMinShape(const AnfNodePtr &anf_node, size_t index); static bool IsNodeDynamicShape(const AnfNodePtr &node); + static bool IsHostKernel(const CNodePtr &node); static void InferShape(const CNodePtr &node, std::map *depend_tensors = nullptr); static void AddArgList(AbstractBasePtrList *args_spec_list, const AnfNodePtr &cnode_input, const AnfNodePtr &real_input, size_t index); diff --git a/tests/st/ops/ascend/test_shape.py b/tests/st/ops/ascend/test_shape.py new file mode 100644 index 00000000000..a597721d1c7 --- /dev/null +++ b/tests/st/ops/ascend/test_shape.py @@ -0,0 +1,58 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +import numpy as np +import pytest +import mindspore.context as context +import mindspore.nn as nn +import mindspore.dataset as ds +from mindspore.ops import operations as P +from mindspore import Model + +context.set_context(mode=context.GRAPH_MODE, + device_target="Ascend") + +def dataset_generator(): + for i in range(1, 10): + yield(np.ones((32, 2*i), dtype=np.float32), np.ones((32, 2*i), dtype=np.float32)) + +class Net(nn.Cell): + def __init__(self): + super(Net, self).__init__() + self.unique = P.Unique() + self.shape = P.DynamicShape() + self.reshape = P.Reshape() + self.add = P.Add() + + def construct(self, x, y): + val = self.add(x, y) + size = self.shape(val) + res = self.reshape(val, size) + return res + +@pytest.mark.level0 +@pytest.mark.platform_arm_ascend_training +@pytest.mark.platform_x86_ascend_training +@pytest.mark.env_onecard +def test_shape(): + """ + Feature: dynamic shape + Description: dynamic shape input data set + Expectation: success + """ + network = Net() + dataset = ds.GeneratorDataset(dataset_generator, ["data1", "data2"]) + dataset.set_dynamic_columns(columns={"data1": [32, None], "data2": [32, None]}) + model = Model(network) + model.train(1, dataset, sink_size=1)