identity support uncontiguous
This commit is contained in:
parent
3f818e10e8
commit
23e366e712
|
@ -0,0 +1,100 @@
|
|||
/**
|
||||
* Copyright 2024 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "mindspore/ccsrc/kernel/pyboost/customize/identity.h"
|
||||
#include <memory>
|
||||
#include <utility>
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
namespace pyboost {
|
||||
|
||||
void IdentityCustomizeCallWithoutContigous(const std::shared_ptr<OpRunner> &op, const TensorPtr &x_tensor,
|
||||
void *stream) {
|
||||
// Async
|
||||
PyBoostUtils::DispatchRun(std::make_shared<runtime::PyBoostDeviceTask>([op, x_tensor, stream]() {
|
||||
MS_LOG(DEBUG) << "Run device task Identity start";
|
||||
auto device_context = op->device_context();
|
||||
const auto &outputs = op->outputs();
|
||||
auto input_x_address = std::dynamic_pointer_cast<device::DeviceAddress>(x_tensor->device_address());
|
||||
|
||||
// Malloc for input tensors
|
||||
PyBoostUtils::MallocOpInputs(device_context, x_tensor);
|
||||
|
||||
// Malloc for output tensors
|
||||
auto launch_device_address = runtime::DeviceAddressUtils::CreateDeviceAddress(
|
||||
op->device_context(), outputs[0], x_tensor->storage_info()->ori_shape, op->stream_id());
|
||||
if (!device_context->device_res_manager_->AllocateMemory(launch_device_address.get())) {
|
||||
MS_LOG(EXCEPTION) << "Allocate memory failed";
|
||||
}
|
||||
|
||||
// Get inputs kernel tensors, the not-tensor value will malloc here
|
||||
const auto &input_address_info = PyBoostUtils::GetAddressInfo(device_context, op->input_abs(), x_tensor);
|
||||
|
||||
// Get outputs kernel tensors
|
||||
std::vector<kernel::KernelTensor *> output_kernel_tensor_list{launch_device_address->kernel_tensor().get()};
|
||||
device::DeviceAddressPtrList output_device_address_list{launch_device_address};
|
||||
const auto &output_address_info = std::make_pair(output_kernel_tensor_list, output_device_address_list);
|
||||
|
||||
PyBoostUtils::LaunchKernel(op->primitive(), op->device_context(), input_address_info, output_address_info, stream);
|
||||
auto output_address = std::dynamic_pointer_cast<device::DeviceAddress>(outputs[0]->device_address());
|
||||
output_address->SetStorageInfo(input_x_address->GetStorageInfo());
|
||||
output_address->set_ptr(launch_device_address->GetMutablePtr());
|
||||
MS_LOG(DEBUG) << "Run device task Identity end";
|
||||
}));
|
||||
}
|
||||
|
||||
void IdentityCustomizeCall(const std::shared_ptr<OpRunner> &op, const TensorPtr &x_tensor, void *stream) {
|
||||
// Async
|
||||
PyBoostUtils::DispatchRun(std::make_shared<runtime::PyBoostDeviceTask>([op, x_tensor, stream]() {
|
||||
MS_LOG(DEBUG) << "Run device task Identity start";
|
||||
auto device_context = op->device_context();
|
||||
const auto &outputs = op->outputs();
|
||||
|
||||
// Malloc for input tensors
|
||||
PyBoostUtils::MallocOpInputs(device_context, x_tensor);
|
||||
// Malloc for output tensors
|
||||
PyBoostUtils::MallocOpOutputs(device_context, outputs);
|
||||
|
||||
// Get inputs kernel tensors, the not-tensor value will malloc here
|
||||
const auto &input_address_info = PyBoostUtils::GetAddressInfo(device_context, op->input_abs(), x_tensor);
|
||||
|
||||
// Get outputs kernel tensors
|
||||
const auto &output_address_info = PyBoostUtils::GetAddressInfo(device_context, {op->output_abs()}, outputs);
|
||||
|
||||
PyBoostUtils::LaunchKernel(op->primitive(), op->device_context(), input_address_info, output_address_info, stream);
|
||||
MS_LOG(DEBUG) << "Run device task Identity end";
|
||||
}));
|
||||
}
|
||||
|
||||
tensor::TensorPtr IdentityCustomize(const std::shared_ptr<OpRunner> &op, const TensorPtr &x_tensor, void *stream) {
|
||||
OpRunner::InferOpOutput(op, x_tensor);
|
||||
|
||||
PyBoostUtils::PrepareOpInputs(op->device_context(), x_tensor);
|
||||
PyBoostUtils::PrepareOpOutputs(op->device_context(), op->outputs());
|
||||
|
||||
if (x_tensor->is_contiguous()) {
|
||||
MS_LOG(DEBUG) << "Run Identity input contiguous";
|
||||
IdentityCustomizeCall(op, x_tensor, stream);
|
||||
} else {
|
||||
MS_LOG(DEBUG) << "Run Identity input without contiguous";
|
||||
IdentityCustomizeCallWithoutContigous(op, x_tensor, stream);
|
||||
}
|
||||
return op->output(0);
|
||||
}
|
||||
} // namespace pyboost
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
|
@ -0,0 +1,34 @@
|
|||
/**
|
||||
* Copyright 2024 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_MINDSPORE_CCSRC_KERNEL_PYBOOST_CUSTOMIZE_IDENTITY_H_
|
||||
#define MINDSPORE_MINDSPORE_CCSRC_KERNEL_PYBOOST_CUSTOMIZE_IDENTITY_H_
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
#include "ir/tensor.h"
|
||||
#include "ir/value.h"
|
||||
#include "runtime/hardware/device_context_manager.h"
|
||||
#include "kernel/pyboost/op_runner.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
namespace pyboost {
|
||||
tensor::TensorPtr BACKEND_EXPORT IdentityCustomize(const std::shared_ptr<OpRunner> &op, const TensorPtr &x_tensor,
|
||||
void *stream = nullptr);
|
||||
} // namespace pyboost
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
||||
#endif // MINDSPORE_MINDSPORE_CCSRC_KERNEL_PYBOOST_CUSTOMIZE_IDENTITY_H_
|
|
@ -34,6 +34,7 @@ py::object ${func_name}_Base(const PrimitivePtr &prim, const py::list &args) {
|
|||
// Run op
|
||||
(void)op->Call(${cast_args});
|
||||
${optional_to_value}
|
||||
PyNativeAlgo::PyBoost::DataSyncForGraph(op);
|
||||
// Update op and op_run_info by op outputs
|
||||
PyNativeAlgo::PyBoost::UpdateOpRunInfo(op, {${grad_args}}, op_run_info);
|
||||
|
||||
|
|
|
@ -1386,6 +1386,18 @@ void PyBoost::UpdateOpRunInfo(const kernel::pyboost::OpPtr &op, const vector<Val
|
|||
}
|
||||
}
|
||||
|
||||
void PyBoost::DataSyncForGraph(const kernel::pyboost::OpPtr &op) {
|
||||
auto ms_context = MsContext::GetInstance();
|
||||
MS_EXCEPTION_IF_NULL(ms_context);
|
||||
if (ms_context->get_param<int>(MS_CTX_EXECUTION_MODE) != kPynativeMode) {
|
||||
// If execution mode is Graph Mode in MsContext, the tensor will be the input of graph which will execute in Graph
|
||||
// Mode, if the graph contain no CNode after optimization, the tensor need sync to host.
|
||||
for (const auto &output : op->outputs()) {
|
||||
output->data_sync(true);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
PrimitivePtr PyBoost::ConvertPrimitive(const py::object &obj) {
|
||||
const auto &adapter = obj.cast<PrimitivePyAdapterPtr>();
|
||||
MS_EXCEPTION_IF_NULL(adapter);
|
||||
|
|
|
@ -211,6 +211,7 @@ struct PyBoost {
|
|||
}
|
||||
return ret;
|
||||
}
|
||||
static void DataSyncForGraph(const kernel::pyboost::OpPtr &op);
|
||||
};
|
||||
|
||||
// Some common functions used in both jit and PackFunc grad
|
||||
|
|
|
@ -26,17 +26,76 @@
|
|||
namespace mindspore {
|
||||
namespace kernel {
|
||||
namespace pyboost {
|
||||
tensor::TensorPtr IdentityAscendCustomize(const std::shared_ptr<OpRunner> &op, const TensorPtr &x_tensor) {
|
||||
OpRunner::InferOpOutput(op, x_tensor);
|
||||
|
||||
PyBoostUtils::PrepareOpInputs(op->device_context(), x_tensor);
|
||||
PyBoostUtils::PrepareOpOutputs(op->device_context(), op->outputs());
|
||||
|
||||
void IdentityCustomizeCallWithoutContigous(const std::shared_ptr<OpRunner> &op, const TensorPtr &x_tensor) {
|
||||
// Async
|
||||
PyBoostUtils::DispatchRun(std::make_shared<runtime::PyBoostDeviceTask>([op, x_tensor]() {
|
||||
MS_LOG(DEBUG) << "Run device task Identity start";
|
||||
auto device_context = op->device_context();
|
||||
const auto &outputs = op->outputs();
|
||||
auto input_shape = x_tensor->storage_info()->ori_shape;
|
||||
const auto &output_shape = x_tensor->storage_info()->ori_shape;
|
||||
// Malloc for input tensors
|
||||
PyBoostUtils::MallocOpInputs(device_context, x_tensor);
|
||||
// Malloc for output tensors
|
||||
auto launch_device_address = runtime::DeviceAddressUtils::CreateDeviceAddress(
|
||||
op->device_context(), outputs[0], x_tensor->storage_info()->ori_shape, op->stream_id());
|
||||
if (!device_context->device_res_manager_->AllocateMemory(launch_device_address.get())) {
|
||||
MS_LOG(EXCEPTION) << "Allocate memory failed";
|
||||
}
|
||||
|
||||
auto identity_kernel = std::make_shared<kernel::AclKernelMod>();
|
||||
auto input_x_address = std::dynamic_pointer_cast<device::DeviceAddress>(x_tensor->device_address());
|
||||
|
||||
if (!input_x_address->kernel_tensor()->host_info_exist()) {
|
||||
input_x_address->kernel_tensor()->SetHostInfo(std::make_shared<abstract::TensorShape>(x_tensor->shape()),
|
||||
std::make_shared<TensorType>(x_tensor->Dtype()), nullptr);
|
||||
}
|
||||
if (!launch_device_address->kernel_tensor()->host_info_exist()) {
|
||||
launch_device_address->kernel_tensor()->SetHostInfo(std::make_shared<abstract::TensorShape>(output_shape),
|
||||
std::make_shared<TensorType>(outputs[0]->Dtype()), nullptr);
|
||||
}
|
||||
auto input_kernel_tensors = {input_x_address->kernel_tensor().get()};
|
||||
auto output_kernel_tensors = {launch_device_address->kernel_tensor().get()};
|
||||
|
||||
if (!std::static_pointer_cast<KernelMod>(identity_kernel)
|
||||
->Init(prim::kPrimIdentity, input_kernel_tensors, output_kernel_tensors)) {
|
||||
MS_LOG(EXCEPTION) << "#dmsg#Kernel build failed:#dmsg#Initialize acl kernel op[Identity] failed.";
|
||||
}
|
||||
identity_kernel->CreateAclConverter();
|
||||
identity_kernel->SetDeviceInfo({input_x_address->format()}, {launch_device_address->format()},
|
||||
{input_x_address->type_id()}, {launch_device_address->type_id()});
|
||||
|
||||
identity_kernel->PackageInput(kIndex0, input_x_address->format(), &input_shape);
|
||||
identity_kernel->PackageOutput(kIndex0, output_shape);
|
||||
identity_kernel->SetNeedConvertHostTensor(true);
|
||||
|
||||
if (identity_kernel->Resize(input_kernel_tensors, output_kernel_tensors) != KRET_OK) {
|
||||
MS_LOG(EXCEPTION) << "Kernel identity resize failed";
|
||||
}
|
||||
auto stream_ptr = device_context->device_res_manager_->GetStream(op->stream_id());
|
||||
|
||||
auto workspace_address = PyBoostUtils::CreateWorkSpaceDeviceAddress(identity_kernel, device_context, "Identity");
|
||||
auto workspaces = PyBoostUtils::GetKernelTensorFromAddress(workspace_address);
|
||||
|
||||
if (!identity_kernel->Launch(input_kernel_tensors, workspaces, output_kernel_tensors, stream_ptr)) {
|
||||
MS_LOG(EXCEPTION) << "Launch kernel identity failed";
|
||||
}
|
||||
auto output_address = std::dynamic_pointer_cast<device::DeviceAddress>(outputs[0]->device_address());
|
||||
output_address->SetStorageInfo(input_x_address->GetStorageInfo());
|
||||
output_address->set_ptr(launch_device_address->GetMutablePtr());
|
||||
MS_LOG(DEBUG) << "Run device task Identity end";
|
||||
}));
|
||||
}
|
||||
|
||||
void IdentityCustomizeCall(const std::shared_ptr<OpRunner> &op, const TensorPtr &x_tensor) {
|
||||
// Async
|
||||
PyBoostUtils::DispatchRun(std::make_shared<runtime::PyBoostDeviceTask>([op, x_tensor]() {
|
||||
MS_LOG(DEBUG) << "Run device task Identity start";
|
||||
auto device_context = op->device_context();
|
||||
const auto &outputs = op->outputs();
|
||||
auto input_shape = x_tensor->shape();
|
||||
auto output_shape = outputs[0]->shape();
|
||||
// Malloc for input tensors
|
||||
PyBoostUtils::MallocOpInputs(device_context, x_tensor);
|
||||
// Malloc for output tensors
|
||||
|
@ -45,12 +104,13 @@ tensor::TensorPtr IdentityAscendCustomize(const std::shared_ptr<OpRunner> &op, c
|
|||
auto identity_kernel = std::make_shared<kernel::AclKernelMod>();
|
||||
auto input_x_address = std::dynamic_pointer_cast<device::DeviceAddress>(x_tensor->device_address());
|
||||
auto output_address = std::dynamic_pointer_cast<device::DeviceAddress>(outputs[0]->device_address());
|
||||
|
||||
if (!input_x_address->kernel_tensor()->host_info_exist()) {
|
||||
input_x_address->kernel_tensor()->SetHostInfo(std::make_shared<abstract::TensorShape>(x_tensor->shape()),
|
||||
std::make_shared<TensorType>(x_tensor->Dtype()), nullptr);
|
||||
}
|
||||
if (!output_address->kernel_tensor()->host_info_exist()) {
|
||||
output_address->kernel_tensor()->SetHostInfo(std::make_shared<abstract::TensorShape>(outputs[0]->shape()),
|
||||
output_address->kernel_tensor()->SetHostInfo(std::make_shared<abstract::TensorShape>(output_shape),
|
||||
std::make_shared<TensorType>(outputs[0]->Dtype()), nullptr);
|
||||
}
|
||||
auto input_kernel_tensors = {input_x_address->kernel_tensor().get()};
|
||||
|
@ -63,40 +123,39 @@ tensor::TensorPtr IdentityAscendCustomize(const std::shared_ptr<OpRunner> &op, c
|
|||
identity_kernel->CreateAclConverter();
|
||||
identity_kernel->SetDeviceInfo({input_x_address->format()}, {output_address->format()},
|
||||
{input_x_address->type_id()}, {output_address->type_id()});
|
||||
auto input_shape = x_tensor->shape();
|
||||
|
||||
identity_kernel->PackageInput(kIndex0, input_x_address->format(), &input_shape);
|
||||
identity_kernel->PackageOutput(kIndex0, outputs[0]->shape());
|
||||
identity_kernel->PackageOutput(kIndex0, output_shape);
|
||||
identity_kernel->SetNeedConvertHostTensor(true);
|
||||
|
||||
if (identity_kernel->Resize(input_kernel_tensors, output_kernel_tensors) != KRET_OK) {
|
||||
MS_LOG(EXCEPTION) << "Kernel identity resize failed";
|
||||
}
|
||||
auto stream_ptr = device_context->device_res_manager_->GetStream(kDefaultStreamIndex);
|
||||
auto stream_ptr = device_context->device_res_manager_->GetStream(op->stream_id());
|
||||
|
||||
auto workspace_sizes = identity_kernel->GetWorkspaceSizeList();
|
||||
std::vector<kernel::KernelTensor *> workspaces;
|
||||
workspaces.reserve(workspace_sizes.size());
|
||||
for (size_t i = 0; i < workspace_sizes.size(); ++i) {
|
||||
auto kernel_tensor = std::make_shared<KernelTensor>(
|
||||
nullptr, workspace_sizes[i], Format::DEFAULT_FORMAT, kTypeUnknown, ShapeVector(),
|
||||
device_context->device_context_key().device_name_, device_context->device_context_key().device_id_);
|
||||
auto device_address = device_context->device_res_manager_->CreateDeviceAddress(kernel_tensor);
|
||||
MS_EXCEPTION_IF_NULL(device_address);
|
||||
if (device_address->GetPtr() == nullptr &&
|
||||
!device_context->device_res_manager_->AllocateMemory(device_address.get())) {
|
||||
MS_LOG(EXCEPTION) << "Allocate dynamic workspace memory failed";
|
||||
}
|
||||
(void)workspaces.emplace_back(device_address->kernel_tensor().get());
|
||||
MS_LOG(DEBUG) << "workspace[" << i << "]:" << workspaces.back()->device_ptr()
|
||||
<< " size:" << workspaces.back()->size();
|
||||
}
|
||||
auto workspace_address = PyBoostUtils::CreateWorkSpaceDeviceAddress(identity_kernel, device_context, "Identity");
|
||||
auto workspaces = PyBoostUtils::GetKernelTensorFromAddress(workspace_address);
|
||||
|
||||
if (!identity_kernel->Launch(input_kernel_tensors, workspaces, output_kernel_tensors, stream_ptr)) {
|
||||
MS_LOG(EXCEPTION) << "Launch kernel identity failed";
|
||||
}
|
||||
MS_LOG(DEBUG) << "Run device task Identity end";
|
||||
}));
|
||||
}
|
||||
|
||||
tensor::TensorPtr IdentityAscendCustomize(const std::shared_ptr<OpRunner> &op, const TensorPtr &x_tensor) {
|
||||
OpRunner::InferOpOutput(op, x_tensor);
|
||||
|
||||
PyBoostUtils::PrepareOpInputs(op->device_context(), x_tensor);
|
||||
PyBoostUtils::PrepareOpOutputs(op->device_context(), op->outputs());
|
||||
|
||||
if (x_tensor->is_contiguous()) {
|
||||
MS_LOG(DEBUG) << "Run Identity input contiguous";
|
||||
IdentityCustomizeCall(op, x_tensor);
|
||||
} else {
|
||||
MS_LOG(DEBUG) << "Run Identity input without contiguous";
|
||||
IdentityCustomizeCallWithoutContigous(op, x_tensor);
|
||||
}
|
||||
return op->output(0);
|
||||
}
|
||||
} // namespace pyboost
|
||||
|
|
|
@ -0,0 +1,33 @@
|
|||
/**
|
||||
* Copyright 2024 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "plugin/device/cpu/kernel/pyboost/customize/identity.h"
|
||||
#include <memory>
|
||||
#include <utility>
|
||||
#include "mindspore/ccsrc/kernel/pyboost/customize/identity.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
namespace pyboost {
|
||||
tensor::TensorPtr IdentityCPUCustomize(const std::shared_ptr<OpRunner> &op, const TensorPtr &x_tensor) {
|
||||
MS_LOG(DEBUG) << "Identity call start";
|
||||
IdentityCustomize(op, x_tensor);
|
||||
MS_LOG(DEBUG) << "Identity call end";
|
||||
return op->output(0);
|
||||
}
|
||||
} // namespace pyboost
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
|
@ -0,0 +1,33 @@
|
|||
/**
|
||||
* Copyright 2024 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_MINDSPORE_CCSRC_PLUGIN_DEVICE_CPU_KERNEL_PYBOOST_CUSTOMIZE_IDENTITY_H_
|
||||
#define MINDSPORE_MINDSPORE_CCSRC_PLUGIN_DEVICE_CPU_KERNEL_PYBOOST_CUSTOMIZE_IDENTITY_H_
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
#include "ir/tensor.h"
|
||||
#include "ir/value.h"
|
||||
#include "runtime/hardware/device_context_manager.h"
|
||||
#include "kernel/pyboost/op_runner.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
namespace pyboost {
|
||||
tensor::TensorPtr IdentityCPUCustomize(const std::shared_ptr<OpRunner> &op, const TensorPtr &x_tensor);
|
||||
} // namespace pyboost
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
||||
#endif // MINDSPORE_MINDSPORE_CCSRC_PLUGIN_DEVICE_CPU_KERNEL_PYBOOST_CUSTOMIZE_IDENTITY_H_
|
|
@ -0,0 +1,39 @@
|
|||
/**
|
||||
* Copyright 2024 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "plugin/device/gpu/kernel/pyboost/customize/identity.h"
|
||||
#include <memory>
|
||||
#include <utility>
|
||||
#include "plugin/device/gpu/hal/device/gpu_device_manager.h"
|
||||
#include "mindspore/ccsrc/kernel/pyboost/customize/identity.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
namespace pyboost {
|
||||
tensor::TensorPtr IdentityGPUCustomize(const std::shared_ptr<OpRunner> &op, const TensorPtr &x_tensor) {
|
||||
MS_LOG(DEBUG) << "Identity call start";
|
||||
auto stream = device::gpu::GPUDeviceManager::GetInstance().GetStream(op->stream_id());
|
||||
IdentityCustomize(op, x_tensor, stream);
|
||||
static auto sync = MsContext::GetInstance()->get_param<bool>(MS_CTX_ENABLE_PYNATIVE_SYNCHRONIZE);
|
||||
if (sync && !op->device_context()->device_res_manager_->SyncAllStreams()) {
|
||||
MS_LOG(EXCEPTION) << "SyncStream failed for op Identity.";
|
||||
}
|
||||
MS_LOG(DEBUG) << "Identity call end";
|
||||
return op->output(0);
|
||||
}
|
||||
} // namespace pyboost
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
|
@ -0,0 +1,33 @@
|
|||
/**
|
||||
* Copyright 2024 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_MINDSPORE_CCSRC_PLUGIN_DEVICE_GPU_KERNEL_PYBOOST_CUSTOMIZE_IDENTITY_H_
|
||||
#define MINDSPORE_MINDSPORE_CCSRC_PLUGIN_DEVICE_GPU_KERNEL_PYBOOST_CUSTOMIZE_IDENTITY_H_
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
#include "ir/tensor.h"
|
||||
#include "ir/value.h"
|
||||
#include "runtime/hardware/device_context_manager.h"
|
||||
#include "kernel/pyboost/op_runner.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
namespace pyboost {
|
||||
tensor::TensorPtr IdentityGPUCustomize(const std::shared_ptr<OpRunner> &op, const TensorPtr &x_tensor);
|
||||
} // namespace pyboost
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
||||
#endif // MINDSPORE_MINDSPORE_CCSRC_PLUGIN_DEVICE_GPU_KERNEL_PYBOOST_CUSTOMIZE_IDENTITY_H_
|
|
@ -1077,6 +1077,24 @@ void DeviceAddressUtils::CreateOutputTensorAddress(DeviceContext *device_context
|
|||
}
|
||||
}
|
||||
|
||||
device::DeviceAddressPtr DeviceAddressUtils::CreateDeviceAddress(DeviceContext *device_context,
|
||||
const tensor::TensorPtr &tensor,
|
||||
const ShapeVector &real_shape,
|
||||
const size_t &stream_id) {
|
||||
MS_EXCEPTION_IF_NULL(device_context);
|
||||
MS_EXCEPTION_IF_NULL(tensor);
|
||||
auto tensor_size = GetTypeByte(TypeIdToType(tensor->data_type())) * SizeOf(real_shape);
|
||||
const auto &device_format = GetFormatByTensorShape(device_context, tensor->shape());
|
||||
auto kernel_tensor = std::make_shared<kernel::KernelTensor>(
|
||||
nullptr, tensor_size, device_format, tensor->data_type(), real_shape,
|
||||
device_context->device_context_key().device_name_, device_context->device_context_key().device_id_);
|
||||
kernel_tensor->set_stream_id(stream_id);
|
||||
device::DeviceAddressPtr device_address = device_context->device_res_manager_->CreateDeviceAddress(kernel_tensor);
|
||||
MS_LOG(DEBUG) << "Create tensor device address " << device_address << "Shape: " << tensor->shape()
|
||||
<< ", Type: " << tensor->data_type();
|
||||
return device_address;
|
||||
}
|
||||
|
||||
void DeviceAddressUtils::MallocForOutputs(DeviceContext *device_context,
|
||||
const std::vector<tensor::TensorPtr> &outputs) {
|
||||
for (const auto &output : outputs) {
|
||||
|
|
|
@ -117,6 +117,8 @@ class BACKEND_EXPORT DeviceAddressUtils {
|
|||
|
||||
static void UpdateDeviceAddressHostInfoByNode(const device::DeviceAddressPtr &addr, const AnfNodePtr &node,
|
||||
size_t output_idx);
|
||||
static device::DeviceAddressPtr CreateDeviceAddress(DeviceContext *device_context, const tensor::TensorPtr &tensor,
|
||||
const ShapeVector &real_shape, const size_t &stream_id);
|
||||
};
|
||||
} // namespace runtime
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -9,5 +9,7 @@ identity:
|
|||
function:
|
||||
name: deepcopy
|
||||
dispatch:
|
||||
enable: False
|
||||
Ascend: IdentityAscend
|
||||
enable: True
|
||||
Ascend: IdentityAscend
|
||||
CPU: IdentityCPU
|
||||
GPU: IdentityGPU
|
|
@ -1111,7 +1111,7 @@ def test_empty_like_exception():
|
|||
_pynative_executor.sync()
|
||||
|
||||
|
||||
@pytest.mark.level1
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_arm_ascend_training
|
||||
@pytest.mark.platform_x86_ascend_training
|
||||
@pytest.mark.platform_x86_gpu_training
|
||||
|
|
Loading…
Reference in New Issue