identity support uncontiguous

This commit is contained in:
nomindcarry 2024-02-19 22:24:49 -08:00
parent 3f818e10e8
commit 23e366e712
14 changed files with 396 additions and 29 deletions

View File

@ -0,0 +1,100 @@
/**
* Copyright 2024 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "mindspore/ccsrc/kernel/pyboost/customize/identity.h"
#include <memory>
#include <utility>
namespace mindspore {
namespace kernel {
namespace pyboost {
void IdentityCustomizeCallWithoutContigous(const std::shared_ptr<OpRunner> &op, const TensorPtr &x_tensor,
void *stream) {
// Async
PyBoostUtils::DispatchRun(std::make_shared<runtime::PyBoostDeviceTask>([op, x_tensor, stream]() {
MS_LOG(DEBUG) << "Run device task Identity start";
auto device_context = op->device_context();
const auto &outputs = op->outputs();
auto input_x_address = std::dynamic_pointer_cast<device::DeviceAddress>(x_tensor->device_address());
// Malloc for input tensors
PyBoostUtils::MallocOpInputs(device_context, x_tensor);
// Malloc for output tensors
auto launch_device_address = runtime::DeviceAddressUtils::CreateDeviceAddress(
op->device_context(), outputs[0], x_tensor->storage_info()->ori_shape, op->stream_id());
if (!device_context->device_res_manager_->AllocateMemory(launch_device_address.get())) {
MS_LOG(EXCEPTION) << "Allocate memory failed";
}
// Get inputs kernel tensors, the not-tensor value will malloc here
const auto &input_address_info = PyBoostUtils::GetAddressInfo(device_context, op->input_abs(), x_tensor);
// Get outputs kernel tensors
std::vector<kernel::KernelTensor *> output_kernel_tensor_list{launch_device_address->kernel_tensor().get()};
device::DeviceAddressPtrList output_device_address_list{launch_device_address};
const auto &output_address_info = std::make_pair(output_kernel_tensor_list, output_device_address_list);
PyBoostUtils::LaunchKernel(op->primitive(), op->device_context(), input_address_info, output_address_info, stream);
auto output_address = std::dynamic_pointer_cast<device::DeviceAddress>(outputs[0]->device_address());
output_address->SetStorageInfo(input_x_address->GetStorageInfo());
output_address->set_ptr(launch_device_address->GetMutablePtr());
MS_LOG(DEBUG) << "Run device task Identity end";
}));
}
void IdentityCustomizeCall(const std::shared_ptr<OpRunner> &op, const TensorPtr &x_tensor, void *stream) {
// Async
PyBoostUtils::DispatchRun(std::make_shared<runtime::PyBoostDeviceTask>([op, x_tensor, stream]() {
MS_LOG(DEBUG) << "Run device task Identity start";
auto device_context = op->device_context();
const auto &outputs = op->outputs();
// Malloc for input tensors
PyBoostUtils::MallocOpInputs(device_context, x_tensor);
// Malloc for output tensors
PyBoostUtils::MallocOpOutputs(device_context, outputs);
// Get inputs kernel tensors, the not-tensor value will malloc here
const auto &input_address_info = PyBoostUtils::GetAddressInfo(device_context, op->input_abs(), x_tensor);
// Get outputs kernel tensors
const auto &output_address_info = PyBoostUtils::GetAddressInfo(device_context, {op->output_abs()}, outputs);
PyBoostUtils::LaunchKernel(op->primitive(), op->device_context(), input_address_info, output_address_info, stream);
MS_LOG(DEBUG) << "Run device task Identity end";
}));
}
tensor::TensorPtr IdentityCustomize(const std::shared_ptr<OpRunner> &op, const TensorPtr &x_tensor, void *stream) {
OpRunner::InferOpOutput(op, x_tensor);
PyBoostUtils::PrepareOpInputs(op->device_context(), x_tensor);
PyBoostUtils::PrepareOpOutputs(op->device_context(), op->outputs());
if (x_tensor->is_contiguous()) {
MS_LOG(DEBUG) << "Run Identity input contiguous";
IdentityCustomizeCall(op, x_tensor, stream);
} else {
MS_LOG(DEBUG) << "Run Identity input without contiguous";
IdentityCustomizeCallWithoutContigous(op, x_tensor, stream);
}
return op->output(0);
}
} // namespace pyboost
} // namespace kernel
} // namespace mindspore

View File

@ -0,0 +1,34 @@
/**
* Copyright 2024 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_MINDSPORE_CCSRC_KERNEL_PYBOOST_CUSTOMIZE_IDENTITY_H_
#define MINDSPORE_MINDSPORE_CCSRC_KERNEL_PYBOOST_CUSTOMIZE_IDENTITY_H_
#include <vector>
#include <memory>
#include "ir/tensor.h"
#include "ir/value.h"
#include "runtime/hardware/device_context_manager.h"
#include "kernel/pyboost/op_runner.h"
namespace mindspore {
namespace kernel {
namespace pyboost {
tensor::TensorPtr BACKEND_EXPORT IdentityCustomize(const std::shared_ptr<OpRunner> &op, const TensorPtr &x_tensor,
void *stream = nullptr);
} // namespace pyboost
} // namespace kernel
} // namespace mindspore
#endif // MINDSPORE_MINDSPORE_CCSRC_KERNEL_PYBOOST_CUSTOMIZE_IDENTITY_H_

View File

@ -34,6 +34,7 @@ py::object ${func_name}_Base(const PrimitivePtr &prim, const py::list &args) {
// Run op // Run op
(void)op->Call(${cast_args}); (void)op->Call(${cast_args});
${optional_to_value} ${optional_to_value}
PyNativeAlgo::PyBoost::DataSyncForGraph(op);
// Update op and op_run_info by op outputs // Update op and op_run_info by op outputs
PyNativeAlgo::PyBoost::UpdateOpRunInfo(op, {${grad_args}}, op_run_info); PyNativeAlgo::PyBoost::UpdateOpRunInfo(op, {${grad_args}}, op_run_info);

View File

@ -1386,6 +1386,18 @@ void PyBoost::UpdateOpRunInfo(const kernel::pyboost::OpPtr &op, const vector<Val
} }
} }
void PyBoost::DataSyncForGraph(const kernel::pyboost::OpPtr &op) {
auto ms_context = MsContext::GetInstance();
MS_EXCEPTION_IF_NULL(ms_context);
if (ms_context->get_param<int>(MS_CTX_EXECUTION_MODE) != kPynativeMode) {
// If execution mode is Graph Mode in MsContext, the tensor will be the input of graph which will execute in Graph
// Mode, if the graph contain no CNode after optimization, the tensor need sync to host.
for (const auto &output : op->outputs()) {
output->data_sync(true);
}
}
}
PrimitivePtr PyBoost::ConvertPrimitive(const py::object &obj) { PrimitivePtr PyBoost::ConvertPrimitive(const py::object &obj) {
const auto &adapter = obj.cast<PrimitivePyAdapterPtr>(); const auto &adapter = obj.cast<PrimitivePyAdapterPtr>();
MS_EXCEPTION_IF_NULL(adapter); MS_EXCEPTION_IF_NULL(adapter);

View File

@ -211,6 +211,7 @@ struct PyBoost {
} }
return ret; return ret;
} }
static void DataSyncForGraph(const kernel::pyboost::OpPtr &op);
}; };
// Some common functions used in both jit and PackFunc grad // Some common functions used in both jit and PackFunc grad

View File

@ -26,17 +26,76 @@
namespace mindspore { namespace mindspore {
namespace kernel { namespace kernel {
namespace pyboost { namespace pyboost {
tensor::TensorPtr IdentityAscendCustomize(const std::shared_ptr<OpRunner> &op, const TensorPtr &x_tensor) {
OpRunner::InferOpOutput(op, x_tensor);
PyBoostUtils::PrepareOpInputs(op->device_context(), x_tensor);
PyBoostUtils::PrepareOpOutputs(op->device_context(), op->outputs());
void IdentityCustomizeCallWithoutContigous(const std::shared_ptr<OpRunner> &op, const TensorPtr &x_tensor) {
// Async // Async
PyBoostUtils::DispatchRun(std::make_shared<runtime::PyBoostDeviceTask>([op, x_tensor]() { PyBoostUtils::DispatchRun(std::make_shared<runtime::PyBoostDeviceTask>([op, x_tensor]() {
MS_LOG(DEBUG) << "Run device task Identity start"; MS_LOG(DEBUG) << "Run device task Identity start";
auto device_context = op->device_context(); auto device_context = op->device_context();
const auto &outputs = op->outputs(); const auto &outputs = op->outputs();
auto input_shape = x_tensor->storage_info()->ori_shape;
const auto &output_shape = x_tensor->storage_info()->ori_shape;
// Malloc for input tensors
PyBoostUtils::MallocOpInputs(device_context, x_tensor);
// Malloc for output tensors
auto launch_device_address = runtime::DeviceAddressUtils::CreateDeviceAddress(
op->device_context(), outputs[0], x_tensor->storage_info()->ori_shape, op->stream_id());
if (!device_context->device_res_manager_->AllocateMemory(launch_device_address.get())) {
MS_LOG(EXCEPTION) << "Allocate memory failed";
}
auto identity_kernel = std::make_shared<kernel::AclKernelMod>();
auto input_x_address = std::dynamic_pointer_cast<device::DeviceAddress>(x_tensor->device_address());
if (!input_x_address->kernel_tensor()->host_info_exist()) {
input_x_address->kernel_tensor()->SetHostInfo(std::make_shared<abstract::TensorShape>(x_tensor->shape()),
std::make_shared<TensorType>(x_tensor->Dtype()), nullptr);
}
if (!launch_device_address->kernel_tensor()->host_info_exist()) {
launch_device_address->kernel_tensor()->SetHostInfo(std::make_shared<abstract::TensorShape>(output_shape),
std::make_shared<TensorType>(outputs[0]->Dtype()), nullptr);
}
auto input_kernel_tensors = {input_x_address->kernel_tensor().get()};
auto output_kernel_tensors = {launch_device_address->kernel_tensor().get()};
if (!std::static_pointer_cast<KernelMod>(identity_kernel)
->Init(prim::kPrimIdentity, input_kernel_tensors, output_kernel_tensors)) {
MS_LOG(EXCEPTION) << "#dmsg#Kernel build failed:#dmsg#Initialize acl kernel op[Identity] failed.";
}
identity_kernel->CreateAclConverter();
identity_kernel->SetDeviceInfo({input_x_address->format()}, {launch_device_address->format()},
{input_x_address->type_id()}, {launch_device_address->type_id()});
identity_kernel->PackageInput(kIndex0, input_x_address->format(), &input_shape);
identity_kernel->PackageOutput(kIndex0, output_shape);
identity_kernel->SetNeedConvertHostTensor(true);
if (identity_kernel->Resize(input_kernel_tensors, output_kernel_tensors) != KRET_OK) {
MS_LOG(EXCEPTION) << "Kernel identity resize failed";
}
auto stream_ptr = device_context->device_res_manager_->GetStream(op->stream_id());
auto workspace_address = PyBoostUtils::CreateWorkSpaceDeviceAddress(identity_kernel, device_context, "Identity");
auto workspaces = PyBoostUtils::GetKernelTensorFromAddress(workspace_address);
if (!identity_kernel->Launch(input_kernel_tensors, workspaces, output_kernel_tensors, stream_ptr)) {
MS_LOG(EXCEPTION) << "Launch kernel identity failed";
}
auto output_address = std::dynamic_pointer_cast<device::DeviceAddress>(outputs[0]->device_address());
output_address->SetStorageInfo(input_x_address->GetStorageInfo());
output_address->set_ptr(launch_device_address->GetMutablePtr());
MS_LOG(DEBUG) << "Run device task Identity end";
}));
}
void IdentityCustomizeCall(const std::shared_ptr<OpRunner> &op, const TensorPtr &x_tensor) {
// Async
PyBoostUtils::DispatchRun(std::make_shared<runtime::PyBoostDeviceTask>([op, x_tensor]() {
MS_LOG(DEBUG) << "Run device task Identity start";
auto device_context = op->device_context();
const auto &outputs = op->outputs();
auto input_shape = x_tensor->shape();
auto output_shape = outputs[0]->shape();
// Malloc for input tensors // Malloc for input tensors
PyBoostUtils::MallocOpInputs(device_context, x_tensor); PyBoostUtils::MallocOpInputs(device_context, x_tensor);
// Malloc for output tensors // Malloc for output tensors
@ -45,12 +104,13 @@ tensor::TensorPtr IdentityAscendCustomize(const std::shared_ptr<OpRunner> &op, c
auto identity_kernel = std::make_shared<kernel::AclKernelMod>(); auto identity_kernel = std::make_shared<kernel::AclKernelMod>();
auto input_x_address = std::dynamic_pointer_cast<device::DeviceAddress>(x_tensor->device_address()); auto input_x_address = std::dynamic_pointer_cast<device::DeviceAddress>(x_tensor->device_address());
auto output_address = std::dynamic_pointer_cast<device::DeviceAddress>(outputs[0]->device_address()); auto output_address = std::dynamic_pointer_cast<device::DeviceAddress>(outputs[0]->device_address());
if (!input_x_address->kernel_tensor()->host_info_exist()) { if (!input_x_address->kernel_tensor()->host_info_exist()) {
input_x_address->kernel_tensor()->SetHostInfo(std::make_shared<abstract::TensorShape>(x_tensor->shape()), input_x_address->kernel_tensor()->SetHostInfo(std::make_shared<abstract::TensorShape>(x_tensor->shape()),
std::make_shared<TensorType>(x_tensor->Dtype()), nullptr); std::make_shared<TensorType>(x_tensor->Dtype()), nullptr);
} }
if (!output_address->kernel_tensor()->host_info_exist()) { if (!output_address->kernel_tensor()->host_info_exist()) {
output_address->kernel_tensor()->SetHostInfo(std::make_shared<abstract::TensorShape>(outputs[0]->shape()), output_address->kernel_tensor()->SetHostInfo(std::make_shared<abstract::TensorShape>(output_shape),
std::make_shared<TensorType>(outputs[0]->Dtype()), nullptr); std::make_shared<TensorType>(outputs[0]->Dtype()), nullptr);
} }
auto input_kernel_tensors = {input_x_address->kernel_tensor().get()}; auto input_kernel_tensors = {input_x_address->kernel_tensor().get()};
@ -63,40 +123,39 @@ tensor::TensorPtr IdentityAscendCustomize(const std::shared_ptr<OpRunner> &op, c
identity_kernel->CreateAclConverter(); identity_kernel->CreateAclConverter();
identity_kernel->SetDeviceInfo({input_x_address->format()}, {output_address->format()}, identity_kernel->SetDeviceInfo({input_x_address->format()}, {output_address->format()},
{input_x_address->type_id()}, {output_address->type_id()}); {input_x_address->type_id()}, {output_address->type_id()});
auto input_shape = x_tensor->shape();
identity_kernel->PackageInput(kIndex0, input_x_address->format(), &input_shape); identity_kernel->PackageInput(kIndex0, input_x_address->format(), &input_shape);
identity_kernel->PackageOutput(kIndex0, outputs[0]->shape()); identity_kernel->PackageOutput(kIndex0, output_shape);
identity_kernel->SetNeedConvertHostTensor(true); identity_kernel->SetNeedConvertHostTensor(true);
if (identity_kernel->Resize(input_kernel_tensors, output_kernel_tensors) != KRET_OK) { if (identity_kernel->Resize(input_kernel_tensors, output_kernel_tensors) != KRET_OK) {
MS_LOG(EXCEPTION) << "Kernel identity resize failed"; MS_LOG(EXCEPTION) << "Kernel identity resize failed";
} }
auto stream_ptr = device_context->device_res_manager_->GetStream(kDefaultStreamIndex); auto stream_ptr = device_context->device_res_manager_->GetStream(op->stream_id());
auto workspace_sizes = identity_kernel->GetWorkspaceSizeList(); auto workspace_address = PyBoostUtils::CreateWorkSpaceDeviceAddress(identity_kernel, device_context, "Identity");
std::vector<kernel::KernelTensor *> workspaces; auto workspaces = PyBoostUtils::GetKernelTensorFromAddress(workspace_address);
workspaces.reserve(workspace_sizes.size());
for (size_t i = 0; i < workspace_sizes.size(); ++i) {
auto kernel_tensor = std::make_shared<KernelTensor>(
nullptr, workspace_sizes[i], Format::DEFAULT_FORMAT, kTypeUnknown, ShapeVector(),
device_context->device_context_key().device_name_, device_context->device_context_key().device_id_);
auto device_address = device_context->device_res_manager_->CreateDeviceAddress(kernel_tensor);
MS_EXCEPTION_IF_NULL(device_address);
if (device_address->GetPtr() == nullptr &&
!device_context->device_res_manager_->AllocateMemory(device_address.get())) {
MS_LOG(EXCEPTION) << "Allocate dynamic workspace memory failed";
}
(void)workspaces.emplace_back(device_address->kernel_tensor().get());
MS_LOG(DEBUG) << "workspace[" << i << "]:" << workspaces.back()->device_ptr()
<< " size:" << workspaces.back()->size();
}
if (!identity_kernel->Launch(input_kernel_tensors, workspaces, output_kernel_tensors, stream_ptr)) { if (!identity_kernel->Launch(input_kernel_tensors, workspaces, output_kernel_tensors, stream_ptr)) {
MS_LOG(EXCEPTION) << "Launch kernel identity failed"; MS_LOG(EXCEPTION) << "Launch kernel identity failed";
} }
MS_LOG(DEBUG) << "Run device task Identity end"; MS_LOG(DEBUG) << "Run device task Identity end";
})); }));
}
tensor::TensorPtr IdentityAscendCustomize(const std::shared_ptr<OpRunner> &op, const TensorPtr &x_tensor) {
OpRunner::InferOpOutput(op, x_tensor);
PyBoostUtils::PrepareOpInputs(op->device_context(), x_tensor);
PyBoostUtils::PrepareOpOutputs(op->device_context(), op->outputs());
if (x_tensor->is_contiguous()) {
MS_LOG(DEBUG) << "Run Identity input contiguous";
IdentityCustomizeCall(op, x_tensor);
} else {
MS_LOG(DEBUG) << "Run Identity input without contiguous";
IdentityCustomizeCallWithoutContigous(op, x_tensor);
}
return op->output(0); return op->output(0);
} }
} // namespace pyboost } // namespace pyboost

View File

@ -0,0 +1,33 @@
/**
* Copyright 2024 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "plugin/device/cpu/kernel/pyboost/customize/identity.h"
#include <memory>
#include <utility>
#include "mindspore/ccsrc/kernel/pyboost/customize/identity.h"
namespace mindspore {
namespace kernel {
namespace pyboost {
tensor::TensorPtr IdentityCPUCustomize(const std::shared_ptr<OpRunner> &op, const TensorPtr &x_tensor) {
MS_LOG(DEBUG) << "Identity call start";
IdentityCustomize(op, x_tensor);
MS_LOG(DEBUG) << "Identity call end";
return op->output(0);
}
} // namespace pyboost
} // namespace kernel
} // namespace mindspore

View File

@ -0,0 +1,33 @@
/**
* Copyright 2024 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_MINDSPORE_CCSRC_PLUGIN_DEVICE_CPU_KERNEL_PYBOOST_CUSTOMIZE_IDENTITY_H_
#define MINDSPORE_MINDSPORE_CCSRC_PLUGIN_DEVICE_CPU_KERNEL_PYBOOST_CUSTOMIZE_IDENTITY_H_
#include <vector>
#include <memory>
#include "ir/tensor.h"
#include "ir/value.h"
#include "runtime/hardware/device_context_manager.h"
#include "kernel/pyboost/op_runner.h"
namespace mindspore {
namespace kernel {
namespace pyboost {
tensor::TensorPtr IdentityCPUCustomize(const std::shared_ptr<OpRunner> &op, const TensorPtr &x_tensor);
} // namespace pyboost
} // namespace kernel
} // namespace mindspore
#endif // MINDSPORE_MINDSPORE_CCSRC_PLUGIN_DEVICE_CPU_KERNEL_PYBOOST_CUSTOMIZE_IDENTITY_H_

View File

@ -0,0 +1,39 @@
/**
* Copyright 2024 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "plugin/device/gpu/kernel/pyboost/customize/identity.h"
#include <memory>
#include <utility>
#include "plugin/device/gpu/hal/device/gpu_device_manager.h"
#include "mindspore/ccsrc/kernel/pyboost/customize/identity.h"
namespace mindspore {
namespace kernel {
namespace pyboost {
tensor::TensorPtr IdentityGPUCustomize(const std::shared_ptr<OpRunner> &op, const TensorPtr &x_tensor) {
MS_LOG(DEBUG) << "Identity call start";
auto stream = device::gpu::GPUDeviceManager::GetInstance().GetStream(op->stream_id());
IdentityCustomize(op, x_tensor, stream);
static auto sync = MsContext::GetInstance()->get_param<bool>(MS_CTX_ENABLE_PYNATIVE_SYNCHRONIZE);
if (sync && !op->device_context()->device_res_manager_->SyncAllStreams()) {
MS_LOG(EXCEPTION) << "SyncStream failed for op Identity.";
}
MS_LOG(DEBUG) << "Identity call end";
return op->output(0);
}
} // namespace pyboost
} // namespace kernel
} // namespace mindspore

View File

@ -0,0 +1,33 @@
/**
* Copyright 2024 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_MINDSPORE_CCSRC_PLUGIN_DEVICE_GPU_KERNEL_PYBOOST_CUSTOMIZE_IDENTITY_H_
#define MINDSPORE_MINDSPORE_CCSRC_PLUGIN_DEVICE_GPU_KERNEL_PYBOOST_CUSTOMIZE_IDENTITY_H_
#include <vector>
#include <memory>
#include "ir/tensor.h"
#include "ir/value.h"
#include "runtime/hardware/device_context_manager.h"
#include "kernel/pyboost/op_runner.h"
namespace mindspore {
namespace kernel {
namespace pyboost {
tensor::TensorPtr IdentityGPUCustomize(const std::shared_ptr<OpRunner> &op, const TensorPtr &x_tensor);
} // namespace pyboost
} // namespace kernel
} // namespace mindspore
#endif // MINDSPORE_MINDSPORE_CCSRC_PLUGIN_DEVICE_GPU_KERNEL_PYBOOST_CUSTOMIZE_IDENTITY_H_

View File

@ -1077,6 +1077,24 @@ void DeviceAddressUtils::CreateOutputTensorAddress(DeviceContext *device_context
} }
} }
device::DeviceAddressPtr DeviceAddressUtils::CreateDeviceAddress(DeviceContext *device_context,
const tensor::TensorPtr &tensor,
const ShapeVector &real_shape,
const size_t &stream_id) {
MS_EXCEPTION_IF_NULL(device_context);
MS_EXCEPTION_IF_NULL(tensor);
auto tensor_size = GetTypeByte(TypeIdToType(tensor->data_type())) * SizeOf(real_shape);
const auto &device_format = GetFormatByTensorShape(device_context, tensor->shape());
auto kernel_tensor = std::make_shared<kernel::KernelTensor>(
nullptr, tensor_size, device_format, tensor->data_type(), real_shape,
device_context->device_context_key().device_name_, device_context->device_context_key().device_id_);
kernel_tensor->set_stream_id(stream_id);
device::DeviceAddressPtr device_address = device_context->device_res_manager_->CreateDeviceAddress(kernel_tensor);
MS_LOG(DEBUG) << "Create tensor device address " << device_address << "Shape: " << tensor->shape()
<< ", Type: " << tensor->data_type();
return device_address;
}
void DeviceAddressUtils::MallocForOutputs(DeviceContext *device_context, void DeviceAddressUtils::MallocForOutputs(DeviceContext *device_context,
const std::vector<tensor::TensorPtr> &outputs) { const std::vector<tensor::TensorPtr> &outputs) {
for (const auto &output : outputs) { for (const auto &output : outputs) {

View File

@ -117,6 +117,8 @@ class BACKEND_EXPORT DeviceAddressUtils {
static void UpdateDeviceAddressHostInfoByNode(const device::DeviceAddressPtr &addr, const AnfNodePtr &node, static void UpdateDeviceAddressHostInfoByNode(const device::DeviceAddressPtr &addr, const AnfNodePtr &node,
size_t output_idx); size_t output_idx);
static device::DeviceAddressPtr CreateDeviceAddress(DeviceContext *device_context, const tensor::TensorPtr &tensor,
const ShapeVector &real_shape, const size_t &stream_id);
}; };
} // namespace runtime } // namespace runtime
} // namespace mindspore } // namespace mindspore

View File

@ -9,5 +9,7 @@ identity:
function: function:
name: deepcopy name: deepcopy
dispatch: dispatch:
enable: False enable: True
Ascend: IdentityAscend Ascend: IdentityAscend
CPU: IdentityCPU
GPU: IdentityGPU

View File

@ -1111,7 +1111,7 @@ def test_empty_like_exception():
_pynative_executor.sync() _pynative_executor.sync()
@pytest.mark.level1 @pytest.mark.level0
@pytest.mark.platform_arm_ascend_training @pytest.mark.platform_arm_ascend_training
@pytest.mark.platform_x86_ascend_training @pytest.mark.platform_x86_ascend_training
@pytest.mark.platform_x86_gpu_training @pytest.mark.platform_x86_gpu_training