From 23e366e712850be41407ebbf3419255da4b4b357 Mon Sep 17 00:00:00 2001 From: nomindcarry Date: Mon, 19 Feb 2024 22:24:49 -0800 Subject: [PATCH] identity support uncontiguous --- .../kernel/pyboost/customize/identity.cc | 100 ++++++++++++++++ .../ccsrc/kernel/pyboost/customize/identity.h | 34 ++++++ .../op_function/template/pyboost_function.tpl | 1 + .../ccsrc/pipeline/pynative/pynative_utils.cc | 12 ++ .../ccsrc/pipeline/pynative/pynative_utils.h | 1 + .../kernel/pyboost/customize/identity.cc | 111 ++++++++++++++---- .../cpu/kernel/pyboost/customize/identity.cc | 33 ++++++ .../cpu/kernel/pyboost/customize/identity.h | 33 ++++++ .../gpu/kernel/pyboost/customize/identity.cc | 39 ++++++ .../gpu/kernel/pyboost/customize/identity.h | 33 ++++++ .../runtime/device/device_address_utils.cc | 18 +++ .../runtime/device/device_address_utils.h | 2 + mindspore/core/ops/ops_def/identity_op.yaml | 6 +- tests/st/numpy_native/test_array_creations.py | 2 +- 14 files changed, 396 insertions(+), 29 deletions(-) create mode 100644 mindspore/ccsrc/kernel/pyboost/customize/identity.cc create mode 100644 mindspore/ccsrc/kernel/pyboost/customize/identity.h create mode 100644 mindspore/ccsrc/plugin/device/cpu/kernel/pyboost/customize/identity.cc create mode 100644 mindspore/ccsrc/plugin/device/cpu/kernel/pyboost/customize/identity.h create mode 100644 mindspore/ccsrc/plugin/device/gpu/kernel/pyboost/customize/identity.cc create mode 100644 mindspore/ccsrc/plugin/device/gpu/kernel/pyboost/customize/identity.h diff --git a/mindspore/ccsrc/kernel/pyboost/customize/identity.cc b/mindspore/ccsrc/kernel/pyboost/customize/identity.cc new file mode 100644 index 00000000000..a2e0793fda2 --- /dev/null +++ b/mindspore/ccsrc/kernel/pyboost/customize/identity.cc @@ -0,0 +1,100 @@ +/** + * Copyright 2024 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "mindspore/ccsrc/kernel/pyboost/customize/identity.h" +#include +#include + +namespace mindspore { +namespace kernel { +namespace pyboost { + +void IdentityCustomizeCallWithoutContigous(const std::shared_ptr &op, const TensorPtr &x_tensor, + void *stream) { + // Async + PyBoostUtils::DispatchRun(std::make_shared([op, x_tensor, stream]() { + MS_LOG(DEBUG) << "Run device task Identity start"; + auto device_context = op->device_context(); + const auto &outputs = op->outputs(); + auto input_x_address = std::dynamic_pointer_cast(x_tensor->device_address()); + + // Malloc for input tensors + PyBoostUtils::MallocOpInputs(device_context, x_tensor); + + // Malloc for output tensors + auto launch_device_address = runtime::DeviceAddressUtils::CreateDeviceAddress( + op->device_context(), outputs[0], x_tensor->storage_info()->ori_shape, op->stream_id()); + if (!device_context->device_res_manager_->AllocateMemory(launch_device_address.get())) { + MS_LOG(EXCEPTION) << "Allocate memory failed"; + } + + // Get inputs kernel tensors, the not-tensor value will malloc here + const auto &input_address_info = PyBoostUtils::GetAddressInfo(device_context, op->input_abs(), x_tensor); + + // Get outputs kernel tensors + std::vector output_kernel_tensor_list{launch_device_address->kernel_tensor().get()}; + device::DeviceAddressPtrList output_device_address_list{launch_device_address}; + const auto &output_address_info = std::make_pair(output_kernel_tensor_list, output_device_address_list); + + PyBoostUtils::LaunchKernel(op->primitive(), op->device_context(), input_address_info, output_address_info, stream); + auto output_address = std::dynamic_pointer_cast(outputs[0]->device_address()); + output_address->SetStorageInfo(input_x_address->GetStorageInfo()); + output_address->set_ptr(launch_device_address->GetMutablePtr()); + MS_LOG(DEBUG) << "Run device task Identity end"; + })); +} + +void IdentityCustomizeCall(const std::shared_ptr &op, const TensorPtr &x_tensor, void *stream) { + // Async + PyBoostUtils::DispatchRun(std::make_shared([op, x_tensor, stream]() { + MS_LOG(DEBUG) << "Run device task Identity start"; + auto device_context = op->device_context(); + const auto &outputs = op->outputs(); + + // Malloc for input tensors + PyBoostUtils::MallocOpInputs(device_context, x_tensor); + // Malloc for output tensors + PyBoostUtils::MallocOpOutputs(device_context, outputs); + + // Get inputs kernel tensors, the not-tensor value will malloc here + const auto &input_address_info = PyBoostUtils::GetAddressInfo(device_context, op->input_abs(), x_tensor); + + // Get outputs kernel tensors + const auto &output_address_info = PyBoostUtils::GetAddressInfo(device_context, {op->output_abs()}, outputs); + + PyBoostUtils::LaunchKernel(op->primitive(), op->device_context(), input_address_info, output_address_info, stream); + MS_LOG(DEBUG) << "Run device task Identity end"; + })); +} + +tensor::TensorPtr IdentityCustomize(const std::shared_ptr &op, const TensorPtr &x_tensor, void *stream) { + OpRunner::InferOpOutput(op, x_tensor); + + PyBoostUtils::PrepareOpInputs(op->device_context(), x_tensor); + PyBoostUtils::PrepareOpOutputs(op->device_context(), op->outputs()); + + if (x_tensor->is_contiguous()) { + MS_LOG(DEBUG) << "Run Identity input contiguous"; + IdentityCustomizeCall(op, x_tensor, stream); + } else { + MS_LOG(DEBUG) << "Run Identity input without contiguous"; + IdentityCustomizeCallWithoutContigous(op, x_tensor, stream); + } + return op->output(0); +} +} // namespace pyboost +} // namespace kernel +} // namespace mindspore diff --git a/mindspore/ccsrc/kernel/pyboost/customize/identity.h b/mindspore/ccsrc/kernel/pyboost/customize/identity.h new file mode 100644 index 00000000000..b9705d35a20 --- /dev/null +++ b/mindspore/ccsrc/kernel/pyboost/customize/identity.h @@ -0,0 +1,34 @@ +/** + * Copyright 2024 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_MINDSPORE_CCSRC_KERNEL_PYBOOST_CUSTOMIZE_IDENTITY_H_ +#define MINDSPORE_MINDSPORE_CCSRC_KERNEL_PYBOOST_CUSTOMIZE_IDENTITY_H_ +#include +#include +#include "ir/tensor.h" +#include "ir/value.h" +#include "runtime/hardware/device_context_manager.h" +#include "kernel/pyboost/op_runner.h" + +namespace mindspore { +namespace kernel { +namespace pyboost { +tensor::TensorPtr BACKEND_EXPORT IdentityCustomize(const std::shared_ptr &op, const TensorPtr &x_tensor, + void *stream = nullptr); +} // namespace pyboost +} // namespace kernel +} // namespace mindspore +#endif // MINDSPORE_MINDSPORE_CCSRC_KERNEL_PYBOOST_CUSTOMIZE_IDENTITY_H_ diff --git a/mindspore/ccsrc/pipeline/pynative/op_function/template/pyboost_function.tpl b/mindspore/ccsrc/pipeline/pynative/op_function/template/pyboost_function.tpl index ac5a03ccf00..810373f0f62 100644 --- a/mindspore/ccsrc/pipeline/pynative/op_function/template/pyboost_function.tpl +++ b/mindspore/ccsrc/pipeline/pynative/op_function/template/pyboost_function.tpl @@ -34,6 +34,7 @@ py::object ${func_name}_Base(const PrimitivePtr &prim, const py::list &args) { // Run op (void)op->Call(${cast_args}); ${optional_to_value} + PyNativeAlgo::PyBoost::DataSyncForGraph(op); // Update op and op_run_info by op outputs PyNativeAlgo::PyBoost::UpdateOpRunInfo(op, {${grad_args}}, op_run_info); diff --git a/mindspore/ccsrc/pipeline/pynative/pynative_utils.cc b/mindspore/ccsrc/pipeline/pynative/pynative_utils.cc index 73cc32fddcb..5ec4fe4fb1c 100644 --- a/mindspore/ccsrc/pipeline/pynative/pynative_utils.cc +++ b/mindspore/ccsrc/pipeline/pynative/pynative_utils.cc @@ -1386,6 +1386,18 @@ void PyBoost::UpdateOpRunInfo(const kernel::pyboost::OpPtr &op, const vectorget_param(MS_CTX_EXECUTION_MODE) != kPynativeMode) { + // If execution mode is Graph Mode in MsContext, the tensor will be the input of graph which will execute in Graph + // Mode, if the graph contain no CNode after optimization, the tensor need sync to host. + for (const auto &output : op->outputs()) { + output->data_sync(true); + } + } +} + PrimitivePtr PyBoost::ConvertPrimitive(const py::object &obj) { const auto &adapter = obj.cast(); MS_EXCEPTION_IF_NULL(adapter); diff --git a/mindspore/ccsrc/pipeline/pynative/pynative_utils.h b/mindspore/ccsrc/pipeline/pynative/pynative_utils.h index 6f5b449a999..50d3d0a4add 100644 --- a/mindspore/ccsrc/pipeline/pynative/pynative_utils.h +++ b/mindspore/ccsrc/pipeline/pynative/pynative_utils.h @@ -211,6 +211,7 @@ struct PyBoost { } return ret; } + static void DataSyncForGraph(const kernel::pyboost::OpPtr &op); }; // Some common functions used in both jit and PackFunc grad diff --git a/mindspore/ccsrc/plugin/device/ascend/kernel/pyboost/customize/identity.cc b/mindspore/ccsrc/plugin/device/ascend/kernel/pyboost/customize/identity.cc index 49c2567c686..e63b129e42d 100644 --- a/mindspore/ccsrc/plugin/device/ascend/kernel/pyboost/customize/identity.cc +++ b/mindspore/ccsrc/plugin/device/ascend/kernel/pyboost/customize/identity.cc @@ -26,17 +26,76 @@ namespace mindspore { namespace kernel { namespace pyboost { -tensor::TensorPtr IdentityAscendCustomize(const std::shared_ptr &op, const TensorPtr &x_tensor) { - OpRunner::InferOpOutput(op, x_tensor); - - PyBoostUtils::PrepareOpInputs(op->device_context(), x_tensor); - PyBoostUtils::PrepareOpOutputs(op->device_context(), op->outputs()); +void IdentityCustomizeCallWithoutContigous(const std::shared_ptr &op, const TensorPtr &x_tensor) { // Async PyBoostUtils::DispatchRun(std::make_shared([op, x_tensor]() { MS_LOG(DEBUG) << "Run device task Identity start"; auto device_context = op->device_context(); const auto &outputs = op->outputs(); + auto input_shape = x_tensor->storage_info()->ori_shape; + const auto &output_shape = x_tensor->storage_info()->ori_shape; + // Malloc for input tensors + PyBoostUtils::MallocOpInputs(device_context, x_tensor); + // Malloc for output tensors + auto launch_device_address = runtime::DeviceAddressUtils::CreateDeviceAddress( + op->device_context(), outputs[0], x_tensor->storage_info()->ori_shape, op->stream_id()); + if (!device_context->device_res_manager_->AllocateMemory(launch_device_address.get())) { + MS_LOG(EXCEPTION) << "Allocate memory failed"; + } + + auto identity_kernel = std::make_shared(); + auto input_x_address = std::dynamic_pointer_cast(x_tensor->device_address()); + + if (!input_x_address->kernel_tensor()->host_info_exist()) { + input_x_address->kernel_tensor()->SetHostInfo(std::make_shared(x_tensor->shape()), + std::make_shared(x_tensor->Dtype()), nullptr); + } + if (!launch_device_address->kernel_tensor()->host_info_exist()) { + launch_device_address->kernel_tensor()->SetHostInfo(std::make_shared(output_shape), + std::make_shared(outputs[0]->Dtype()), nullptr); + } + auto input_kernel_tensors = {input_x_address->kernel_tensor().get()}; + auto output_kernel_tensors = {launch_device_address->kernel_tensor().get()}; + + if (!std::static_pointer_cast(identity_kernel) + ->Init(prim::kPrimIdentity, input_kernel_tensors, output_kernel_tensors)) { + MS_LOG(EXCEPTION) << "#dmsg#Kernel build failed:#dmsg#Initialize acl kernel op[Identity] failed."; + } + identity_kernel->CreateAclConverter(); + identity_kernel->SetDeviceInfo({input_x_address->format()}, {launch_device_address->format()}, + {input_x_address->type_id()}, {launch_device_address->type_id()}); + + identity_kernel->PackageInput(kIndex0, input_x_address->format(), &input_shape); + identity_kernel->PackageOutput(kIndex0, output_shape); + identity_kernel->SetNeedConvertHostTensor(true); + + if (identity_kernel->Resize(input_kernel_tensors, output_kernel_tensors) != KRET_OK) { + MS_LOG(EXCEPTION) << "Kernel identity resize failed"; + } + auto stream_ptr = device_context->device_res_manager_->GetStream(op->stream_id()); + + auto workspace_address = PyBoostUtils::CreateWorkSpaceDeviceAddress(identity_kernel, device_context, "Identity"); + auto workspaces = PyBoostUtils::GetKernelTensorFromAddress(workspace_address); + + if (!identity_kernel->Launch(input_kernel_tensors, workspaces, output_kernel_tensors, stream_ptr)) { + MS_LOG(EXCEPTION) << "Launch kernel identity failed"; + } + auto output_address = std::dynamic_pointer_cast(outputs[0]->device_address()); + output_address->SetStorageInfo(input_x_address->GetStorageInfo()); + output_address->set_ptr(launch_device_address->GetMutablePtr()); + MS_LOG(DEBUG) << "Run device task Identity end"; + })); +} + +void IdentityCustomizeCall(const std::shared_ptr &op, const TensorPtr &x_tensor) { + // Async + PyBoostUtils::DispatchRun(std::make_shared([op, x_tensor]() { + MS_LOG(DEBUG) << "Run device task Identity start"; + auto device_context = op->device_context(); + const auto &outputs = op->outputs(); + auto input_shape = x_tensor->shape(); + auto output_shape = outputs[0]->shape(); // Malloc for input tensors PyBoostUtils::MallocOpInputs(device_context, x_tensor); // Malloc for output tensors @@ -45,12 +104,13 @@ tensor::TensorPtr IdentityAscendCustomize(const std::shared_ptr &op, c auto identity_kernel = std::make_shared(); auto input_x_address = std::dynamic_pointer_cast(x_tensor->device_address()); auto output_address = std::dynamic_pointer_cast(outputs[0]->device_address()); + if (!input_x_address->kernel_tensor()->host_info_exist()) { input_x_address->kernel_tensor()->SetHostInfo(std::make_shared(x_tensor->shape()), std::make_shared(x_tensor->Dtype()), nullptr); } if (!output_address->kernel_tensor()->host_info_exist()) { - output_address->kernel_tensor()->SetHostInfo(std::make_shared(outputs[0]->shape()), + output_address->kernel_tensor()->SetHostInfo(std::make_shared(output_shape), std::make_shared(outputs[0]->Dtype()), nullptr); } auto input_kernel_tensors = {input_x_address->kernel_tensor().get()}; @@ -63,40 +123,39 @@ tensor::TensorPtr IdentityAscendCustomize(const std::shared_ptr &op, c identity_kernel->CreateAclConverter(); identity_kernel->SetDeviceInfo({input_x_address->format()}, {output_address->format()}, {input_x_address->type_id()}, {output_address->type_id()}); - auto input_shape = x_tensor->shape(); identity_kernel->PackageInput(kIndex0, input_x_address->format(), &input_shape); - identity_kernel->PackageOutput(kIndex0, outputs[0]->shape()); + identity_kernel->PackageOutput(kIndex0, output_shape); identity_kernel->SetNeedConvertHostTensor(true); if (identity_kernel->Resize(input_kernel_tensors, output_kernel_tensors) != KRET_OK) { MS_LOG(EXCEPTION) << "Kernel identity resize failed"; } - auto stream_ptr = device_context->device_res_manager_->GetStream(kDefaultStreamIndex); + auto stream_ptr = device_context->device_res_manager_->GetStream(op->stream_id()); - auto workspace_sizes = identity_kernel->GetWorkspaceSizeList(); - std::vector workspaces; - workspaces.reserve(workspace_sizes.size()); - for (size_t i = 0; i < workspace_sizes.size(); ++i) { - auto kernel_tensor = std::make_shared( - nullptr, workspace_sizes[i], Format::DEFAULT_FORMAT, kTypeUnknown, ShapeVector(), - device_context->device_context_key().device_name_, device_context->device_context_key().device_id_); - auto device_address = device_context->device_res_manager_->CreateDeviceAddress(kernel_tensor); - MS_EXCEPTION_IF_NULL(device_address); - if (device_address->GetPtr() == nullptr && - !device_context->device_res_manager_->AllocateMemory(device_address.get())) { - MS_LOG(EXCEPTION) << "Allocate dynamic workspace memory failed"; - } - (void)workspaces.emplace_back(device_address->kernel_tensor().get()); - MS_LOG(DEBUG) << "workspace[" << i << "]:" << workspaces.back()->device_ptr() - << " size:" << workspaces.back()->size(); - } + auto workspace_address = PyBoostUtils::CreateWorkSpaceDeviceAddress(identity_kernel, device_context, "Identity"); + auto workspaces = PyBoostUtils::GetKernelTensorFromAddress(workspace_address); if (!identity_kernel->Launch(input_kernel_tensors, workspaces, output_kernel_tensors, stream_ptr)) { MS_LOG(EXCEPTION) << "Launch kernel identity failed"; } MS_LOG(DEBUG) << "Run device task Identity end"; })); +} + +tensor::TensorPtr IdentityAscendCustomize(const std::shared_ptr &op, const TensorPtr &x_tensor) { + OpRunner::InferOpOutput(op, x_tensor); + + PyBoostUtils::PrepareOpInputs(op->device_context(), x_tensor); + PyBoostUtils::PrepareOpOutputs(op->device_context(), op->outputs()); + + if (x_tensor->is_contiguous()) { + MS_LOG(DEBUG) << "Run Identity input contiguous"; + IdentityCustomizeCall(op, x_tensor); + } else { + MS_LOG(DEBUG) << "Run Identity input without contiguous"; + IdentityCustomizeCallWithoutContigous(op, x_tensor); + } return op->output(0); } } // namespace pyboost diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/pyboost/customize/identity.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/pyboost/customize/identity.cc new file mode 100644 index 00000000000..476b3b8602c --- /dev/null +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/pyboost/customize/identity.cc @@ -0,0 +1,33 @@ +/** + * Copyright 2024 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "plugin/device/cpu/kernel/pyboost/customize/identity.h" +#include +#include +#include "mindspore/ccsrc/kernel/pyboost/customize/identity.h" + +namespace mindspore { +namespace kernel { +namespace pyboost { +tensor::TensorPtr IdentityCPUCustomize(const std::shared_ptr &op, const TensorPtr &x_tensor) { + MS_LOG(DEBUG) << "Identity call start"; + IdentityCustomize(op, x_tensor); + MS_LOG(DEBUG) << "Identity call end"; + return op->output(0); +} +} // namespace pyboost +} // namespace kernel +} // namespace mindspore diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/pyboost/customize/identity.h b/mindspore/ccsrc/plugin/device/cpu/kernel/pyboost/customize/identity.h new file mode 100644 index 00000000000..24dec4a8ee0 --- /dev/null +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/pyboost/customize/identity.h @@ -0,0 +1,33 @@ +/** + * Copyright 2024 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_MINDSPORE_CCSRC_PLUGIN_DEVICE_CPU_KERNEL_PYBOOST_CUSTOMIZE_IDENTITY_H_ +#define MINDSPORE_MINDSPORE_CCSRC_PLUGIN_DEVICE_CPU_KERNEL_PYBOOST_CUSTOMIZE_IDENTITY_H_ +#include +#include +#include "ir/tensor.h" +#include "ir/value.h" +#include "runtime/hardware/device_context_manager.h" +#include "kernel/pyboost/op_runner.h" + +namespace mindspore { +namespace kernel { +namespace pyboost { +tensor::TensorPtr IdentityCPUCustomize(const std::shared_ptr &op, const TensorPtr &x_tensor); +} // namespace pyboost +} // namespace kernel +} // namespace mindspore +#endif // MINDSPORE_MINDSPORE_CCSRC_PLUGIN_DEVICE_CPU_KERNEL_PYBOOST_CUSTOMIZE_IDENTITY_H_ diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/pyboost/customize/identity.cc b/mindspore/ccsrc/plugin/device/gpu/kernel/pyboost/customize/identity.cc new file mode 100644 index 00000000000..431d248c0c1 --- /dev/null +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/pyboost/customize/identity.cc @@ -0,0 +1,39 @@ +/** + * Copyright 2024 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "plugin/device/gpu/kernel/pyboost/customize/identity.h" +#include +#include +#include "plugin/device/gpu/hal/device/gpu_device_manager.h" +#include "mindspore/ccsrc/kernel/pyboost/customize/identity.h" + +namespace mindspore { +namespace kernel { +namespace pyboost { +tensor::TensorPtr IdentityGPUCustomize(const std::shared_ptr &op, const TensorPtr &x_tensor) { + MS_LOG(DEBUG) << "Identity call start"; + auto stream = device::gpu::GPUDeviceManager::GetInstance().GetStream(op->stream_id()); + IdentityCustomize(op, x_tensor, stream); + static auto sync = MsContext::GetInstance()->get_param(MS_CTX_ENABLE_PYNATIVE_SYNCHRONIZE); + if (sync && !op->device_context()->device_res_manager_->SyncAllStreams()) { + MS_LOG(EXCEPTION) << "SyncStream failed for op Identity."; + } + MS_LOG(DEBUG) << "Identity call end"; + return op->output(0); +} +} // namespace pyboost +} // namespace kernel +} // namespace mindspore diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/pyboost/customize/identity.h b/mindspore/ccsrc/plugin/device/gpu/kernel/pyboost/customize/identity.h new file mode 100644 index 00000000000..5eb8527a3e3 --- /dev/null +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/pyboost/customize/identity.h @@ -0,0 +1,33 @@ +/** + * Copyright 2024 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_MINDSPORE_CCSRC_PLUGIN_DEVICE_GPU_KERNEL_PYBOOST_CUSTOMIZE_IDENTITY_H_ +#define MINDSPORE_MINDSPORE_CCSRC_PLUGIN_DEVICE_GPU_KERNEL_PYBOOST_CUSTOMIZE_IDENTITY_H_ +#include +#include +#include "ir/tensor.h" +#include "ir/value.h" +#include "runtime/hardware/device_context_manager.h" +#include "kernel/pyboost/op_runner.h" + +namespace mindspore { +namespace kernel { +namespace pyboost { +tensor::TensorPtr IdentityGPUCustomize(const std::shared_ptr &op, const TensorPtr &x_tensor); +} // namespace pyboost +} // namespace kernel +} // namespace mindspore +#endif // MINDSPORE_MINDSPORE_CCSRC_PLUGIN_DEVICE_GPU_KERNEL_PYBOOST_CUSTOMIZE_IDENTITY_H_ diff --git a/mindspore/ccsrc/runtime/device/device_address_utils.cc b/mindspore/ccsrc/runtime/device/device_address_utils.cc index 8c7855cd81b..71496cd794f 100644 --- a/mindspore/ccsrc/runtime/device/device_address_utils.cc +++ b/mindspore/ccsrc/runtime/device/device_address_utils.cc @@ -1077,6 +1077,24 @@ void DeviceAddressUtils::CreateOutputTensorAddress(DeviceContext *device_context } } +device::DeviceAddressPtr DeviceAddressUtils::CreateDeviceAddress(DeviceContext *device_context, + const tensor::TensorPtr &tensor, + const ShapeVector &real_shape, + const size_t &stream_id) { + MS_EXCEPTION_IF_NULL(device_context); + MS_EXCEPTION_IF_NULL(tensor); + auto tensor_size = GetTypeByte(TypeIdToType(tensor->data_type())) * SizeOf(real_shape); + const auto &device_format = GetFormatByTensorShape(device_context, tensor->shape()); + auto kernel_tensor = std::make_shared( + nullptr, tensor_size, device_format, tensor->data_type(), real_shape, + device_context->device_context_key().device_name_, device_context->device_context_key().device_id_); + kernel_tensor->set_stream_id(stream_id); + device::DeviceAddressPtr device_address = device_context->device_res_manager_->CreateDeviceAddress(kernel_tensor); + MS_LOG(DEBUG) << "Create tensor device address " << device_address << "Shape: " << tensor->shape() + << ", Type: " << tensor->data_type(); + return device_address; +} + void DeviceAddressUtils::MallocForOutputs(DeviceContext *device_context, const std::vector &outputs) { for (const auto &output : outputs) { diff --git a/mindspore/ccsrc/runtime/device/device_address_utils.h b/mindspore/ccsrc/runtime/device/device_address_utils.h index c046eb7531d..9bdff1c2f0d 100644 --- a/mindspore/ccsrc/runtime/device/device_address_utils.h +++ b/mindspore/ccsrc/runtime/device/device_address_utils.h @@ -117,6 +117,8 @@ class BACKEND_EXPORT DeviceAddressUtils { static void UpdateDeviceAddressHostInfoByNode(const device::DeviceAddressPtr &addr, const AnfNodePtr &node, size_t output_idx); + static device::DeviceAddressPtr CreateDeviceAddress(DeviceContext *device_context, const tensor::TensorPtr &tensor, + const ShapeVector &real_shape, const size_t &stream_id); }; } // namespace runtime } // namespace mindspore diff --git a/mindspore/core/ops/ops_def/identity_op.yaml b/mindspore/core/ops/ops_def/identity_op.yaml index 1fe28bd7d29..3694eb82af8 100644 --- a/mindspore/core/ops/ops_def/identity_op.yaml +++ b/mindspore/core/ops/ops_def/identity_op.yaml @@ -9,5 +9,7 @@ identity: function: name: deepcopy dispatch: - enable: False - Ascend: IdentityAscend \ No newline at end of file + enable: True + Ascend: IdentityAscend + CPU: IdentityCPU + GPU: IdentityGPU \ No newline at end of file diff --git a/tests/st/numpy_native/test_array_creations.py b/tests/st/numpy_native/test_array_creations.py index c001c27a174..1363b7d6cfb 100644 --- a/tests/st/numpy_native/test_array_creations.py +++ b/tests/st/numpy_native/test_array_creations.py @@ -1111,7 +1111,7 @@ def test_empty_like_exception(): _pynative_executor.sync() -@pytest.mark.level1 +@pytest.mark.level0 @pytest.mark.platform_arm_ascend_training @pytest.mark.platform_x86_ascend_training @pytest.mark.platform_x86_gpu_training