forked from mindspore-Ecosystem/mindspore
!6795 rewrite schduler & fix subGraph segmentation bug
Merge pull request !6795 from hangq/primitive
This commit is contained in:
commit
688c7c104b
|
@ -81,7 +81,6 @@ include_directories(${CCSRC_DIR})
|
|||
include_directories(${CMAKE_CURRENT_SOURCE_DIR})
|
||||
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/src/runtime/kernel/arm)
|
||||
include_directories(${TOP_DIR}/third_party)
|
||||
include_directories(${TOP_DIR}/third_party/flatbuffers/include)
|
||||
include_directories(${CMAKE_BINARY_DIR})
|
||||
|
||||
include(${TOP_DIR}/cmake/utils.cmake)
|
||||
|
|
|
@ -27,16 +27,6 @@
|
|||
|
||||
namespace mindspore {
|
||||
namespace session {
|
||||
/// \brief CallBackParam defined input arguments for callBack function.
|
||||
struct CallBackParam {
|
||||
std::string node_name; /**< node name argument */
|
||||
std::string node_type; /**< node type argument */
|
||||
};
|
||||
|
||||
/// \brief KernelCallBack defined the function pointer for callBack.
|
||||
using KernelCallBack = std::function<bool(std::vector<tensor::MSTensor *> inputs,
|
||||
std::vector<tensor::MSTensor *> outputs, const CallBackParam &opInfo)>;
|
||||
|
||||
/// \brief LiteSession defined session in MindSpore Lite for compiling Model and forwarding model.
|
||||
class MS_API LiteSession {
|
||||
public:
|
||||
|
|
|
@ -17,9 +17,11 @@
|
|||
#ifndef MINDSPORE_LITE_INCLUDE_MS_TENSOR_H_
|
||||
#define MINDSPORE_LITE_INCLUDE_MS_TENSOR_H_
|
||||
|
||||
#include <functional>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
#include "ir/dtype/type_id.h"
|
||||
|
||||
namespace mindspore {
|
||||
|
@ -74,5 +76,14 @@ class MS_API MSTensor {
|
|||
virtual void *MutableData() = 0;
|
||||
};
|
||||
} // namespace tensor
|
||||
/// \brief CallBackParam defined input arguments for callBack function.
|
||||
struct CallBackParam {
|
||||
std::string node_name; /**< node name argument */
|
||||
std::string node_type; /**< node type argument */
|
||||
};
|
||||
|
||||
/// \brief KernelCallBack defined the function pointer for callBack.
|
||||
using KernelCallBack = std::function<bool(std::vector<tensor::MSTensor *> inputs,
|
||||
std::vector<tensor::MSTensor *> outputs, const CallBackParam &opInfo)>;
|
||||
} // namespace mindspore
|
||||
#endif // MINDSPORE_LITE_INCLUDE_MS_TENSOR_H_
|
||||
|
|
|
@ -29,6 +29,7 @@ set(LITE_SRC
|
|||
${CMAKE_CURRENT_SOURCE_DIR}/inner_context.cc
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernel_registry.cc
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/lite_kernel.cc
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/sub_graph_kernel.cc
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/populate_parameter.cc
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/scheduler.cc
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/lite_session.cc
|
||||
|
|
|
@ -14,11 +14,11 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <stdlib.h>
|
||||
#include "src/common/file_utils.h"
|
||||
#include <fcntl.h>
|
||||
#include <stdlib.h>
|
||||
#include <climits>
|
||||
#include <cmath>
|
||||
#include "src/common/file_utils.h"
|
||||
#include "securec/include/securec.h"
|
||||
|
||||
namespace mindspore {
|
||||
|
@ -78,7 +78,7 @@ std::string RealPath(const char *path) {
|
|||
char *real_path = realpath(path, resolvedPath.get());
|
||||
#endif
|
||||
if (real_path == nullptr || strlen(real_path) == 0) {
|
||||
MS_LOG(ERROR) << "Proto file path is not valid";
|
||||
MS_LOG(ERROR) << "file path is not valid : " << path;
|
||||
return "";
|
||||
}
|
||||
std::string res = resolvedPath.get();
|
||||
|
|
|
@ -19,10 +19,7 @@
|
|||
#include "include/errorcode.h"
|
||||
|
||||
namespace mindspore::lite {
|
||||
int Executor::Run(std::vector<Tensor *> &in_tensors, std::vector<Tensor *> &out_tensors,
|
||||
std::vector<kernel::LiteKernel *> &kernels, Allocator *allocator,
|
||||
const session::KernelCallBack &before, const session::KernelCallBack &after) {
|
||||
MS_ASSERT(nullptr != allocator);
|
||||
int Executor::CheckInputs(std::vector<Tensor *> &in_tensors) {
|
||||
for (auto &inTensor : in_tensors) {
|
||||
if (inTensor == nullptr) {
|
||||
MS_LOG(ERROR) << "Graph input tensor is nullptr";
|
||||
|
@ -32,10 +29,18 @@ int Executor::Run(std::vector<Tensor *> &in_tensors, std::vector<Tensor *> &out_
|
|||
MS_LOG(ERROR) << "Graph input tensor data is nullptr";
|
||||
return RET_ERROR;
|
||||
}
|
||||
if (inTensor->GetFormat() != schema::Format::Format_NHWC) {
|
||||
MS_LOG(ERROR) << "Model input tensor should be NHWC";
|
||||
return RET_ERROR;
|
||||
}
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int Executor::Run(std::vector<Tensor *> &in_tensors, std::vector<Tensor *> &out_tensors,
|
||||
std::vector<kernel::LiteKernel *> &kernels, Allocator *allocator, const KernelCallBack &before,
|
||||
const KernelCallBack &after) {
|
||||
MS_ASSERT(nullptr != allocator);
|
||||
auto ret = this->CheckInputs(in_tensors);
|
||||
if (RET_OK != ret) {
|
||||
MS_LOG(ERROR) << "CheckInputs failed";
|
||||
return ret;
|
||||
}
|
||||
kernel::LiteKernelUtil::InitTensorRefCount(kernels);
|
||||
for (auto out_tensor : out_tensors) { // increase RefCount of output tensors, such that Run will not free them
|
||||
|
@ -44,34 +49,20 @@ int Executor::Run(std::vector<Tensor *> &in_tensors, std::vector<Tensor *> &out_
|
|||
|
||||
for (auto *kernel : kernels) {
|
||||
MS_ASSERT(nullptr != kernel);
|
||||
|
||||
if (before != nullptr) {
|
||||
if (!before(TensorVectorCast(kernel->in_tensors()), TensorVectorCast(kernel->out_tensors()),
|
||||
{kernel->name(), kernel->type_str()})) {
|
||||
MS_LOG(ERROR) << "run kernel before_callback failed, name: " << kernel->name();
|
||||
}
|
||||
ret = kernel->PreProcess();
|
||||
if (RET_OK != ret) {
|
||||
MS_LOG(ERROR) << "PreProcess kernel failed, name: " << kernel->name();
|
||||
return ret;
|
||||
}
|
||||
|
||||
auto ret = kernel->Run();
|
||||
if (0 != ret) {
|
||||
ret = kernel->Run(before, after);
|
||||
if (RET_OK != ret) {
|
||||
MS_LOG(ERROR) << "run kernel failed, name: " << kernel->name();
|
||||
return ret;
|
||||
}
|
||||
if (after != nullptr) {
|
||||
if (!after(TensorVectorCast(kernel->in_tensors()), TensorVectorCast(kernel->out_tensors()),
|
||||
{kernel->name(), kernel->type_str()})) {
|
||||
MS_LOG(ERROR) << "run kernel after_callback failed, name: " << kernel->name();
|
||||
}
|
||||
}
|
||||
for (auto input_kernel : kernel->in_kernels()) {
|
||||
MS_ASSERT(input_kernel != nullptr);
|
||||
if (input_kernel->is_model_output()) {
|
||||
continue;
|
||||
}
|
||||
ret = input_kernel->DecOutTensorRefCount();
|
||||
if (0 != ret) {
|
||||
MS_LOG(WARNING) << "DecOutTensorRefCount for kernel" << kernel->name() << " failed";
|
||||
}
|
||||
ret = kernel->PostProcess();
|
||||
if (RET_OK != ret) {
|
||||
MS_LOG(ERROR) << "PostProcess kernel failed, name: " << kernel->name();
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
return RET_OK;
|
||||
|
@ -99,9 +90,9 @@ int Executor::TransformTensorLayoutFp32(Tensor *tensor, schema::Format dst_forma
|
|||
MS_ASSERT(4 == tensor->shape().size());
|
||||
auto src_format = tensor->GetFormat();
|
||||
if (src_format == schema::Format::Format_NC4HW4 && dst_format == schema::Format::Format_NHWC) {
|
||||
auto *src_data = tensor->MutableData();
|
||||
auto *src_data = tensor->data_c();
|
||||
if (src_data == nullptr) {
|
||||
MS_LOG(ERROR) << "MutableData return nullptr";
|
||||
MS_LOG(ERROR) << "data of tensor is nullptr";
|
||||
return RET_ERROR;
|
||||
}
|
||||
auto *dst_data = allocator->Malloc(tensor->Size());
|
||||
|
|
|
@ -28,13 +28,15 @@ class Executor {
|
|||
Executor() = default;
|
||||
virtual ~Executor() = default;
|
||||
|
||||
virtual int Prepare(std::vector<kernel::LiteKernel *> &kernels) { return 0; }
|
||||
virtual int Prepare(const std::vector<kernel::LiteKernel *> &kernels) { return 0; }
|
||||
|
||||
virtual int Run(std::vector<Tensor *> &in_tensors, std::vector<Tensor *> &out_tensors,
|
||||
std::vector<kernel::LiteKernel *> &kernels, Allocator *allocator = nullptr,
|
||||
const session::KernelCallBack &before = nullptr, const session::KernelCallBack &after = nullptr);
|
||||
const KernelCallBack &before = nullptr, const KernelCallBack &after = nullptr);
|
||||
|
||||
protected:
|
||||
int CheckInputs(std::vector<Tensor *> &in_tensors);
|
||||
|
||||
int TransformTensorLayoutFp32(Tensor *tensor, schema::Format dst_format, Allocator *allocator = nullptr);
|
||||
|
||||
int TransformTensorLayoutUint8(Tensor *tensor, schema::Format dst_format, Allocator *allocator = nullptr);
|
||||
|
|
|
@ -19,12 +19,21 @@
|
|||
#include "src/common/log_adapter.h"
|
||||
|
||||
namespace mindspore::lite {
|
||||
InnerContext::InnerContext(const Context *context) {
|
||||
this->allocator = context->allocator;
|
||||
this->thread_num_ = context->thread_num_;
|
||||
this->device_list_.clear();
|
||||
for (auto &device_ctx : context->device_list_) {
|
||||
this->device_list_.push_back(device_ctx);
|
||||
}
|
||||
}
|
||||
|
||||
int InnerContext::Init() {
|
||||
if (this->device_list_.empty()) {
|
||||
MS_LOG(ERROR) << "Device list is empty.";
|
||||
if (RET_OK != this->IsValid()) {
|
||||
MS_LOG(ERROR) << "Context is not valid";
|
||||
return RET_NOT_SUPPORT;
|
||||
}
|
||||
if (this->thread_pool_ == nullptr && this->device_list_[0].device_type_ == DT_CPU) {
|
||||
if (this->thread_pool_ == nullptr && this->IsCpuEnabled()) {
|
||||
this->thread_pool_ =
|
||||
CreateLiteThreadPool(this->thread_num_, this->device_list_[0].device_info_.cpu_device_info_.cpu_bind_mode_);
|
||||
if (this->thread_pool_ == nullptr) {
|
||||
|
@ -49,4 +58,74 @@ InnerContext::~InnerContext() {
|
|||
this->thread_pool_ = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
int InnerContext::IsValid() {
|
||||
if (this->device_list_.empty()) {
|
||||
MS_LOG(ERROR) << "Device list is empty.";
|
||||
return RET_NOT_SUPPORT;
|
||||
}
|
||||
#ifndef SUPPORT_GPU
|
||||
if (IsGpuEnabled()) {
|
||||
MS_LOG(ERROR) << "GPU is not supported.";
|
||||
return RET_NOT_SUPPORT;
|
||||
}
|
||||
#endif
|
||||
if (IsNpuEnabled()) {
|
||||
MS_LOG(ERROR) << "NPU is not supported.";
|
||||
return RET_NOT_SUPPORT;
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
bool InnerContext::IsCpuFloat16Enabled() {
|
||||
if (!IsCpuEnabled()) {
|
||||
return false;
|
||||
}
|
||||
return GetCpuInfo().enable_float16_;
|
||||
}
|
||||
|
||||
bool InnerContext::IsGpuFloat16Enabled() {
|
||||
if (!IsGpuEnabled()) {
|
||||
return false;
|
||||
}
|
||||
return GetGpuInfo().enable_float16_;
|
||||
}
|
||||
|
||||
bool InnerContext::IsCpuEnabled() {
|
||||
return this->device_list_.end() !=
|
||||
std::find_if(this->device_list_.begin(), this->device_list_.end(),
|
||||
[](const DeviceContext &device) { return device.device_type_ == DT_CPU; });
|
||||
}
|
||||
|
||||
bool InnerContext::IsGpuEnabled() {
|
||||
return this->device_list_.end() !=
|
||||
std::find_if(this->device_list_.begin(), this->device_list_.end(),
|
||||
[](const DeviceContext &device) { return device.device_type_ == DT_GPU; });
|
||||
}
|
||||
|
||||
bool InnerContext::IsNpuEnabled() {
|
||||
return this->device_list_.end() !=
|
||||
std::find_if(this->device_list_.begin(), this->device_list_.end(),
|
||||
[](const DeviceContext &device) { return device.device_type_ == DT_NPU; });
|
||||
}
|
||||
|
||||
CpuDeviceInfo InnerContext::GetCpuInfo() {
|
||||
auto iter = std::find_if(this->device_list_.begin(), this->device_list_.end(),
|
||||
[](const DeviceContext &device) { return device.device_type_ == DT_CPU; });
|
||||
if (iter == this->device_list_.end()) {
|
||||
return {};
|
||||
} else {
|
||||
return iter->device_info_.cpu_device_info_;
|
||||
}
|
||||
}
|
||||
|
||||
GpuDeviceInfo InnerContext::GetGpuInfo() {
|
||||
auto iter = std::find_if(this->device_list_.begin(), this->device_list_.end(),
|
||||
[](const DeviceContext &device) { return device.device_type_ == DT_GPU; });
|
||||
if (iter == this->device_list_.end()) {
|
||||
return {};
|
||||
} else {
|
||||
return iter->device_info_.gpu_device_info_;
|
||||
}
|
||||
}
|
||||
} // namespace mindspore::lite
|
||||
|
|
|
@ -27,8 +27,28 @@ struct InnerContext : public Context {
|
|||
struct ThreadPool *thread_pool_ = nullptr;
|
||||
|
||||
public:
|
||||
InnerContext() = default;
|
||||
|
||||
explicit InnerContext(const Context *context);
|
||||
|
||||
int Init();
|
||||
|
||||
bool IsCpuFloat16Enabled();
|
||||
|
||||
bool IsGpuFloat16Enabled();
|
||||
|
||||
bool IsCpuEnabled();
|
||||
|
||||
bool IsGpuEnabled();
|
||||
|
||||
bool IsNpuEnabled();
|
||||
|
||||
CpuDeviceInfo GetCpuInfo();
|
||||
|
||||
GpuDeviceInfo GetGpuInfo();
|
||||
|
||||
int IsValid();
|
||||
|
||||
virtual ~InnerContext();
|
||||
};
|
||||
} // namespace mindspore::lite
|
||||
|
|
|
@ -117,8 +117,9 @@ kernel::LiteKernel *KernelRegistry::GetKernel(const std::vector<Tensor *> &in_te
|
|||
if (creator != nullptr) {
|
||||
auto kernel = creator(in_tensors, out_tensors, parameter, ctx, key, primitive);
|
||||
if (kernel != nullptr) {
|
||||
return kernel;
|
||||
kernel->set_desc(key);
|
||||
}
|
||||
return kernel;
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
|
|
@ -16,8 +16,11 @@
|
|||
|
||||
#include "src/lite_kernel.h"
|
||||
#include <algorithm>
|
||||
#include "src/tensor.h"
|
||||
|
||||
namespace mindspore::kernel {
|
||||
using mindspore::lite::RET_ERROR;
|
||||
using mindspore::lite::RET_OK;
|
||||
|
||||
void *LiteKernel::workspace_ = nullptr;
|
||||
|
||||
|
@ -54,7 +57,21 @@ int LiteKernel::DecOutTensorRefCount() {
|
|||
return 0;
|
||||
}
|
||||
|
||||
int LiteKernel::Prepare() {
|
||||
int LiteKernel::FreeWorkTensor() const {
|
||||
for (auto input_kernel : this->in_kernels()) {
|
||||
MS_ASSERT(input_kernel != nullptr);
|
||||
if (input_kernel->is_model_output()) {
|
||||
continue;
|
||||
}
|
||||
auto ret = input_kernel->DecOutTensorRefCount();
|
||||
if (0 != ret) {
|
||||
MS_LOG(WARNING) << "DecOutTensorRefCount for kernel" << this->name() << " failed";
|
||||
}
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int LiteKernel::PreProcess() {
|
||||
if (!InferShapeDone()) {
|
||||
(const_cast<mindspore::lite::PrimitiveC *>(primitive_))->SetInferFlag(true);
|
||||
auto ret = (const_cast<mindspore::lite::PrimitiveC *>(primitive_))->InferShape(in_tensors_, out_tensors_);
|
||||
|
@ -70,7 +87,7 @@ int LiteKernel::Prepare() {
|
|||
}
|
||||
}
|
||||
|
||||
auto &outputs = this->out_tensors();
|
||||
auto outputs = this->out_tensors();
|
||||
for (auto *output : outputs) {
|
||||
MS_ASSERT(output != nullptr);
|
||||
output->MallocData();
|
||||
|
@ -78,6 +95,50 @@ int LiteKernel::Prepare() {
|
|||
return RET_OK;
|
||||
}
|
||||
|
||||
int LiteKernel::Run(const KernelCallBack &before, const KernelCallBack &after) {
|
||||
if (before != nullptr) {
|
||||
if (!before(TensorVectorCast(this->in_tensors_), TensorVectorCast(this->out_tensors_),
|
||||
{this->name_, this->type_str()})) {
|
||||
MS_LOG(WARNING) << "run kernel before_callback failed, name: " << this->name_;
|
||||
}
|
||||
}
|
||||
auto ret = Run();
|
||||
if (RET_OK != ret) {
|
||||
MS_LOG(ERROR) << "run kernel failed, name: " << this->name_;
|
||||
return ret;
|
||||
}
|
||||
if (after != nullptr) {
|
||||
if (!after(TensorVectorCast(this->in_tensors_), TensorVectorCast(this->out_tensors_),
|
||||
{this->name_, this->type_str()})) {
|
||||
MS_LOG(ERROR) << "run kernel after_callback failed, name: " << this->name_;
|
||||
}
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
std::string LiteKernel::ToString() const {
|
||||
std::ostringstream oss;
|
||||
oss << "LiteKernel: " << this->name_;
|
||||
oss << ", Type: " << this->type_str();
|
||||
oss << std::endl << this->in_tensors_.size() << " InputTensors:";
|
||||
for (auto tensor : in_tensors_) {
|
||||
oss << " " << tensor << ":" << tensor->ToString();
|
||||
}
|
||||
oss << std::endl << this->out_tensors_.size() << " OutputTensors:";
|
||||
for (auto tensor : out_tensors_) {
|
||||
oss << " " << tensor << ":" << tensor->ToString();
|
||||
}
|
||||
oss << std::endl << this->in_kernels_.size() << " InputKernels:";
|
||||
for (auto in_kernel : in_kernels_) {
|
||||
oss << " " << in_kernel->name_;
|
||||
}
|
||||
oss << std::endl << this->out_kernels_.size() << " OutputKernels:";
|
||||
for (auto out_kernel : out_kernels_) {
|
||||
oss << " " << out_kernel->name_;
|
||||
}
|
||||
return oss.str();
|
||||
}
|
||||
|
||||
std::vector<kernel::LiteKernel *> LiteKernelUtil::SubgraphInputKernels(
|
||||
const std::vector<kernel::LiteKernel *> &kernels) {
|
||||
std::vector<kernel::LiteKernel *> input_kernels;
|
||||
|
@ -87,10 +148,11 @@ std::vector<kernel::LiteKernel *> LiteKernelUtil::SubgraphInputKernels(
|
|||
continue;
|
||||
}
|
||||
for (const auto &input : kernel->in_kernels()) {
|
||||
auto iter = std::find(kernels.begin(), kernels.end(), input);
|
||||
auto item = std::find(input_kernels.begin(), input_kernels.end(), kernel);
|
||||
if (iter == kernels.end() && item == input_kernels.end()) {
|
||||
auto in_kernel_in_graph = std::find(kernels.begin(), kernels.end(), input);
|
||||
auto in_kernel_in_ret = std::find(input_kernels.begin(), input_kernels.end(), kernel);
|
||||
if (in_kernel_in_graph == kernels.end() && in_kernel_in_ret == input_kernels.end()) {
|
||||
input_kernels.emplace_back(kernel);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -106,10 +168,11 @@ std::vector<kernel::LiteKernel *> LiteKernelUtil::SubgraphOutputKernels(
|
|||
continue;
|
||||
}
|
||||
for (const auto &output : kernel->out_kernels()) {
|
||||
auto iter = std::find(kernels.begin(), kernels.end(), output);
|
||||
auto item = std::find(output_kernels.begin(), output_kernels.end(), kernel);
|
||||
if (iter == kernels.end() && item == output_kernels.end()) {
|
||||
auto out_kernel_in_graph = std::find(kernels.begin(), kernels.end(), output);
|
||||
auto out_kernel_in_ret = std::find(output_kernels.begin(), output_kernels.end(), kernel);
|
||||
if (out_kernel_in_graph == kernels.end() && out_kernel_in_ret == output_kernels.end()) {
|
||||
output_kernels.emplace_back(kernel);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -120,7 +183,8 @@ std::vector<lite::Tensor *> LiteKernelUtil::SubgraphInputTensors(const std::vect
|
|||
std::vector<lite::Tensor *> input_tensors;
|
||||
std::vector<lite::Tensor *> all_output_tensors;
|
||||
for (const auto &kernel : kernels) {
|
||||
all_output_tensors.insert(all_output_tensors.end(), kernel->out_tensors().begin(), kernel->out_tensors().end());
|
||||
auto kernel_out_tensors = kernel->out_tensors();
|
||||
all_output_tensors.insert(all_output_tensors.end(), kernel_out_tensors.begin(), kernel_out_tensors.end());
|
||||
}
|
||||
std::vector<kernel::LiteKernel *> input_kernels = SubgraphInputKernels(kernels);
|
||||
for (const auto &kernel : input_kernels) {
|
||||
|
@ -139,7 +203,8 @@ std::vector<lite::Tensor *> LiteKernelUtil::SubgraphOutputTensors(const std::vec
|
|||
std::vector<lite::Tensor *> output_tensors;
|
||||
std::vector<lite::Tensor *> all_input_tensors;
|
||||
for (const auto &kernel : kernels) {
|
||||
all_input_tensors.insert(all_input_tensors.end(), kernel->in_tensors().begin(), kernel->in_tensors().end());
|
||||
auto kernel_in_tensors = kernel->in_tensors();
|
||||
all_input_tensors.insert(all_input_tensors.end(), kernel_in_tensors.begin(), kernel_in_tensors.end());
|
||||
}
|
||||
std::vector<kernel::LiteKernel *> output_kernels = SubgraphOutputKernels(kernels);
|
||||
for (const auto &kernel : output_kernels) {
|
||||
|
@ -153,8 +218,12 @@ std::vector<lite::Tensor *> LiteKernelUtil::SubgraphOutputTensors(const std::vec
|
|||
return output_tensors;
|
||||
}
|
||||
|
||||
void LiteKernelUtil::TopologicalSortKernels(std::vector<kernel::LiteKernel *> &kernels) {
|
||||
void LiteKernelUtil::InitIOKernels(std::vector<kernel::LiteKernel *> &kernels) {
|
||||
for (auto *kernel : kernels) {
|
||||
// clean io kernels
|
||||
kernel->SetInKernel({});
|
||||
kernel->SetOutKernel({});
|
||||
// find io kernels
|
||||
for (auto *search_kernel : kernels) {
|
||||
if (search_kernel == kernel) {
|
||||
continue;
|
||||
|
|
|
@ -19,6 +19,7 @@
|
|||
#include <string>
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
#include <utility>
|
||||
#include "src/ops/primitive_c.h"
|
||||
#include "src/common/utils.h"
|
||||
#ifdef ENABLE_ARM
|
||||
|
@ -32,9 +33,7 @@
|
|||
static constexpr int kPerTensor = 1;
|
||||
|
||||
namespace mindspore::kernel {
|
||||
using mindspore::lite::RET_ERROR;
|
||||
using mindspore::lite::RET_OK;
|
||||
enum KERNEL_ARCH { kCPU, kGPU, kNPU, kKernelArch_MIN = kCPU, kKernelArch_MAX = kNPU };
|
||||
enum KERNEL_ARCH { kCPU, kGPU, kAPU, kNPU, kKernelArch_MIN = kCPU, kKernelArch_MAX = kNPU };
|
||||
struct KernelKey {
|
||||
KERNEL_ARCH arch;
|
||||
TypeId data_type;
|
||||
|
@ -51,16 +50,17 @@ struct KernelKey {
|
|||
}
|
||||
};
|
||||
|
||||
enum SubGraphType { kNotSubGraph = 0, kCpuFP32SubGraph, kCpuFP16SubGraph, kGpuSubGraph, kNpuSubGraph, kApuSubGraph };
|
||||
|
||||
class LiteKernel {
|
||||
public:
|
||||
LiteKernel() = default;
|
||||
// parameter should be deleted or freed by caller, and should be deleted or freed after LiteKernel is deleted
|
||||
LiteKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &in_tensors,
|
||||
const std::vector<lite::Tensor *> &out_tensors, const lite::InnerContext *ctx,
|
||||
const mindspore::lite::PrimitiveC *primitive)
|
||||
LiteKernel(OpParameter *parameter, std::vector<lite::Tensor *> in_tensors, std::vector<lite::Tensor *> out_tensors,
|
||||
const lite::InnerContext *ctx, const mindspore::lite::PrimitiveC *primitive)
|
||||
: op_parameter_(parameter),
|
||||
in_tensors_(in_tensors),
|
||||
out_tensors_(out_tensors),
|
||||
in_tensors_(std::move(in_tensors)),
|
||||
out_tensors_(std::move(out_tensors)),
|
||||
primitive_(primitive),
|
||||
context_(ctx) {
|
||||
if (op_parameter_ != nullptr && ctx != nullptr) {
|
||||
|
@ -77,15 +77,22 @@ class LiteKernel {
|
|||
}
|
||||
}
|
||||
|
||||
virtual int Prepare();
|
||||
// called while compiling graph
|
||||
virtual int Prepare() { return mindspore::lite::RET_OK; }
|
||||
// called before Run
|
||||
virtual int PreProcess();
|
||||
|
||||
virtual int Init() { return -1; }
|
||||
virtual int Run() { return mindspore::lite::RET_ERROR; }
|
||||
|
||||
virtual int ReSize() { return -1; }
|
||||
virtual int Run(const KernelCallBack &before, const KernelCallBack &after);
|
||||
// called after Run
|
||||
virtual int PostProcess() { return FreeWorkTensor(); }
|
||||
|
||||
virtual int Run() { return -1; }
|
||||
virtual int ReSize() { return mindspore::lite::RET_ERROR; }
|
||||
|
||||
std::string name() { return this->name_; }
|
||||
virtual int Init() { return mindspore::lite::RET_ERROR; }
|
||||
|
||||
std::string name() const { return this->name_; }
|
||||
|
||||
virtual void train() { train_mode_ = true; }
|
||||
|
||||
|
@ -101,20 +108,20 @@ class LiteKernel {
|
|||
|
||||
bool is_model_output() const { return this->is_model_output_; }
|
||||
|
||||
schema::PrimitiveType Type() {
|
||||
schema::PrimitiveType Type() const {
|
||||
return (this->op_parameter_ != nullptr) ? schema::PrimitiveType(this->op_parameter_->type_)
|
||||
: schema::PrimitiveType_NONE;
|
||||
}
|
||||
|
||||
std::string type_str() { return schema::EnumNamePrimitiveType(this->Type()); }
|
||||
std::string type_str() const { return schema::EnumNamePrimitiveType(this->Type()); }
|
||||
|
||||
void set_in_tensors(const std::vector<lite::Tensor *> &in_tensors) { this->in_tensors_ = in_tensors; }
|
||||
|
||||
void set_out_tensors(const std::vector<lite::Tensor *> &out_tensors) { this->out_tensors_ = out_tensors; }
|
||||
|
||||
std::vector<lite::Tensor *> &in_tensors() { return this->in_tensors_; }
|
||||
std::vector<lite::Tensor *> in_tensors() const { return this->in_tensors_; }
|
||||
|
||||
std::vector<lite::Tensor *> &out_tensors() { return this->out_tensors_; }
|
||||
std::vector<lite::Tensor *> out_tensors() const { return this->out_tensors_; }
|
||||
|
||||
void AddInKernel(LiteKernel *kernel) {
|
||||
if (!lite::IsContain(this->in_kernels_, kernel)) {
|
||||
|
@ -132,14 +139,16 @@ class LiteKernel {
|
|||
|
||||
void SetOutKernel(const std::vector<LiteKernel *> &kernel) { this->out_kernels_ = kernel; }
|
||||
|
||||
std::vector<LiteKernel *> &in_kernels() { return this->in_kernels_; }
|
||||
std::vector<LiteKernel *> in_kernels() const { return this->in_kernels_; }
|
||||
|
||||
std::vector<LiteKernel *> &out_kernels() { return this->out_kernels_; }
|
||||
std::vector<LiteKernel *> out_kernels() const { return this->out_kernels_; }
|
||||
|
||||
void InitOutTensorRefCount();
|
||||
|
||||
int DecOutTensorRefCount();
|
||||
|
||||
int FreeWorkTensor() const;
|
||||
|
||||
KernelKey desc() const { return desc_; }
|
||||
|
||||
void set_desc(const KernelKey kernel_key) { desc_ = kernel_key; }
|
||||
|
@ -151,10 +160,14 @@ class LiteKernel {
|
|||
static void FreeWorkspace();
|
||||
void *GetWorkspace() { return workspace_; }
|
||||
|
||||
protected:
|
||||
bool InferShapeDone() { return !(primitive_ != nullptr && !primitive_->GetInferFlag()) && true; }
|
||||
SubGraphType subgraph_type() const { return this->subgraph_type_; }
|
||||
|
||||
KernelKey desc_;
|
||||
virtual std::string ToString() const;
|
||||
|
||||
protected:
|
||||
bool InferShapeDone() { return !(primitive_ != nullptr && !primitive_->GetInferFlag()); }
|
||||
|
||||
KernelKey desc_{};
|
||||
std::string name_;
|
||||
OpParameter *op_parameter_ = nullptr;
|
||||
// tensor will free in ~lite_session()
|
||||
|
@ -168,27 +181,7 @@ class LiteKernel {
|
|||
bool is_model_output_ = false;
|
||||
size_t workspace_size_ = 0;
|
||||
static void *workspace_;
|
||||
};
|
||||
|
||||
class SubGraphKernel : public LiteKernel {
|
||||
public:
|
||||
explicit SubGraphKernel(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs,
|
||||
const std::vector<kernel::LiteKernel *> &in_kernels,
|
||||
const std::vector<kernel::LiteKernel *> &out_kernels,
|
||||
const std::vector<kernel::LiteKernel *> &nodes, const lite::InnerContext *ctx,
|
||||
const mindspore::lite::PrimitiveC *primitive)
|
||||
: LiteKernel(nullptr, inputs, outputs, ctx, primitive), nodes_(nodes) {
|
||||
in_kernels_ = in_kernels;
|
||||
out_kernels_ = out_kernels;
|
||||
}
|
||||
|
||||
virtual int Init() { return -1; }
|
||||
virtual int InferShape() { return -1; }
|
||||
virtual int ReSize() { return -1; }
|
||||
virtual int Run() { return -1; }
|
||||
|
||||
protected:
|
||||
std::vector<LiteKernel *> nodes_;
|
||||
SubGraphType subgraph_type_ = kNotSubGraph;
|
||||
};
|
||||
|
||||
typedef LiteKernel *(*KernelCreator)(const std::vector<lite::Tensor *> &inputs,
|
||||
|
@ -198,7 +191,7 @@ typedef LiteKernel *(*KernelCreator)(const std::vector<lite::Tensor *> &inputs,
|
|||
|
||||
class LiteKernelUtil {
|
||||
public:
|
||||
static void TopologicalSortKernels(std::vector<kernel::LiteKernel *> &kernels);
|
||||
static void InitIOKernels(std::vector<kernel::LiteKernel *> &kernels);
|
||||
|
||||
static std::vector<kernel::LiteKernel *> SubgraphInputKernels(const std::vector<kernel::LiteKernel *> &kernels);
|
||||
|
||||
|
|
|
@ -295,13 +295,13 @@ int LiteSession::CompileGraph(Model *model) {
|
|||
|
||||
std::vector<mindspore::tensor::MSTensor *> LiteSession::GetInputs() const { return this->input_vec_; }
|
||||
|
||||
int LiteSession::RunGraph(const session::KernelCallBack &before, const session::KernelCallBack &after) {
|
||||
int LiteSession::RunGraph(const KernelCallBack &before, const KernelCallBack &after) {
|
||||
bool expected = false;
|
||||
if (!is_running_.compare_exchange_strong(expected, true)) {
|
||||
MS_LOG(ERROR) << "Not support multi-threading";
|
||||
return RET_ERROR;
|
||||
}
|
||||
STATUS ret = RET_ERROR;
|
||||
STATUS ret;
|
||||
MS_ASSERT(this->context_);
|
||||
if (before == nullptr && after == nullptr) {
|
||||
ret = executor->Run(this->inputs_, this->outputs_, this->kernels_, this->context_->allocator.get());
|
||||
|
@ -325,39 +325,12 @@ int LiteSession::Init(Context *context) {
|
|||
return RET_NULL_PTR;
|
||||
}
|
||||
|
||||
if (context->device_list_.empty()) {
|
||||
MS_LOG(ERROR) << "Device list is empty.";
|
||||
is_running_.store(false);
|
||||
return RET_NOT_SUPPORT;
|
||||
}
|
||||
|
||||
auto &device_type = context->device_list_[0].device_type_;
|
||||
|
||||
if (device_type == DT_NPU) {
|
||||
MS_LOG(ERROR) << "NPU is not supported.";
|
||||
is_running_.store(false);
|
||||
return RET_NOT_SUPPORT;
|
||||
}
|
||||
#ifndef SUPPORT_GPU
|
||||
if (device_type == DT_GPU) {
|
||||
MS_LOG(ERROR) << "GPU is not supported.";
|
||||
is_running_.store(false);
|
||||
return RET_NOT_SUPPORT;
|
||||
}
|
||||
#endif
|
||||
|
||||
this->context_ = new (std::nothrow) InnerContext();
|
||||
this->context_ = new (std::nothrow) InnerContext(context);
|
||||
if (this->context_ == nullptr) {
|
||||
MS_LOG(ERROR) << "New Context failed";
|
||||
is_running_.store(false);
|
||||
return RET_MEMORY_FAILED;
|
||||
}
|
||||
this->context_->allocator = context->allocator;
|
||||
this->context_->thread_num_ = context->thread_num_;
|
||||
this->context_->device_list_.clear();
|
||||
for (auto &device_ctx : context->device_list_) {
|
||||
this->context_->device_list_.push_back(device_ctx);
|
||||
}
|
||||
auto ret = this->context_->Init();
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Init Context failed";
|
||||
|
@ -371,12 +344,11 @@ int LiteSession::Init(Context *context) {
|
|||
return ret;
|
||||
}
|
||||
#if SUPPORT_GPU
|
||||
if (device_type == DT_GPU) {
|
||||
auto gpu_device_info = this->context_->device_list_[0].device_info_.gpu_device_info_;
|
||||
if (this->context_->IsGpuEnabled()) {
|
||||
auto gpu_device_info = this->context_->GetGpuInfo();
|
||||
auto opencl_runtime = ocl_runtime_wrap_.GetInstance();
|
||||
opencl_runtime->SetFp16Enable(gpu_device_info.enable_float16_);
|
||||
if (opencl_runtime->Init() != RET_OK) {
|
||||
device_type = DT_CPU;
|
||||
MS_LOG(WARNING) << "Init OpenCL runtime failed, change to CPU mode.";
|
||||
} else {
|
||||
MS_LOG(INFO) << "Init OpenCL runtime success.";
|
||||
|
@ -398,14 +370,13 @@ void LiteSession::BindThread(bool if_bind) {
|
|||
MS_LOG(ERROR) << "Device list is empty.";
|
||||
return;
|
||||
}
|
||||
auto &device_ctx = this->context_->device_list_[0];
|
||||
if (device_ctx.device_type_ != DT_CPU) {
|
||||
MS_LOG(ERROR) << "Device is not CPU.";
|
||||
if (this->context_->IsCpuEnabled()) {
|
||||
return;
|
||||
}
|
||||
if (device_ctx.device_info_.cpu_device_info_.cpu_bind_mode_ != NO_BIND) {
|
||||
auto cpu_device_info = this->context_->GetCpuInfo();
|
||||
if (cpu_device_info.cpu_bind_mode_ != NO_BIND) {
|
||||
MS_ASSERT(this->context_->thread_pool_ != NULL);
|
||||
BindThreads(this->context_->thread_pool_, if_bind, device_ctx.device_info_.cpu_device_info_.cpu_bind_mode_);
|
||||
BindThreads(this->context_->thread_pool_, if_bind, cpu_device_info.cpu_bind_mode_);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -52,8 +52,7 @@ class LiteSession : public session::LiteSession {
|
|||
|
||||
mindspore::tensor::MSTensor *GetInputsByTensorName(const std::string &name) const override;
|
||||
|
||||
int RunGraph(const session::KernelCallBack &before = nullptr,
|
||||
const session::KernelCallBack &after = nullptr) override;
|
||||
int RunGraph(const KernelCallBack &before = nullptr, const KernelCallBack &after = nullptr) override;
|
||||
|
||||
std::vector<mindspore::tensor::MSTensor *> GetOutputsByNodeName(const std::string &node_name) const override;
|
||||
|
||||
|
|
|
@ -163,11 +163,6 @@ int RunPriorBox(void *cdata, int task_id) {
|
|||
}
|
||||
|
||||
int PriorBoxCPUKernel::Run() {
|
||||
auto prepare_ret = Prepare();
|
||||
if (prepare_ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Prepare fail! Ret error code[" << prepare_ret << "]";
|
||||
return prepare_ret;
|
||||
}
|
||||
int error_code = ParallelLaunch(this->context_->thread_pool_, RunPriorBox, this, thread_count_);
|
||||
if (error_code != RET_OK) {
|
||||
MS_LOG(ERROR) << "PriorBox run error, error_code[" << error_code << "]";
|
||||
|
|
|
@ -140,12 +140,6 @@ int QuantDTypeCastRun(void *cdata, int task_id) {
|
|||
}
|
||||
|
||||
int QuantDTypeCastCPUKernel::Run() {
|
||||
auto prepare_ret = Prepare();
|
||||
if (prepare_ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret;
|
||||
return prepare_ret;
|
||||
}
|
||||
|
||||
if (in_tensors_[0]->data_type() == TypeId::kNumberTypeInt8 &&
|
||||
out_tensors_[0]->data_type() == TypeId::kNumberTypeFloat32) {
|
||||
int8_ptr_ = reinterpret_cast<int8_t *>(in_tensors_[0]->data_c());
|
||||
|
|
|
@ -91,24 +91,18 @@ int StridedSliceCPUKernel::HandleMultiInputs() {
|
|||
}
|
||||
|
||||
int StridedSliceCPUKernel::Run() {
|
||||
auto ret = Prepare();
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Prepare fail!ret: " << ret;
|
||||
return ret;
|
||||
}
|
||||
|
||||
auto input = in_tensors_.at(0);
|
||||
auto output = out_tensors_.at(0);
|
||||
MS_ASSERT(input);
|
||||
MS_ASSERT(output);
|
||||
if (in_tensors().size() == kMultiInputsSize) {
|
||||
ret = HandleMultiInputs();
|
||||
auto ret = HandleMultiInputs();
|
||||
if (ret != RET_OK) {
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
ret = DoStridedSlice(input->MutableData(), output->MutableData(),
|
||||
reinterpret_cast<StridedSliceParameter *>(op_parameter_));
|
||||
auto ret = DoStridedSlice(input->MutableData(), output->MutableData(),
|
||||
reinterpret_cast<StridedSliceParameter *>(op_parameter_));
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "StridedSlice error error_code[" << ret << "]";
|
||||
return RET_ERROR;
|
||||
|
|
|
@ -103,9 +103,9 @@ int ActivationFp16Run(void *cdata, int task_id) {
|
|||
}
|
||||
|
||||
int ActivationFp16CPUKernel::Run() {
|
||||
auto ret = Prepare();
|
||||
auto ret = MallocTmpBuffer();
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Prepare failed.";
|
||||
MS_LOG(ERROR) << "MallocTmpBuffer failed.";
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
|
|
@ -185,11 +185,6 @@ static int ArithmeticsRunFp16(void *cdata, int task_id) {
|
|||
}
|
||||
|
||||
int ArithmeticFP16CPUKernel::Run() {
|
||||
auto ret = Prepare();
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Prepare fail!ret: " << ret;
|
||||
return ret;
|
||||
}
|
||||
auto output_tensor = out_tensors_.at(0);
|
||||
is_input0_fp32_ = in_tensors_.at(0)->data_type() == kNumberTypeFloat32;
|
||||
is_input1_fp32_ = in_tensors_.at(1)->data_type() == kNumberTypeFloat32;
|
||||
|
@ -203,7 +198,7 @@ int ArithmeticFP16CPUKernel::Run() {
|
|||
FreeTmpBuffer();
|
||||
return RET_ERROR;
|
||||
}
|
||||
ret = ParallelLaunch(this->context_->thread_pool_, ArithmeticsRunFp16, this, context_->thread_num_);
|
||||
auto ret = ParallelLaunch(this->context_->thread_pool_, ArithmeticsRunFp16, this, context_->thread_num_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "ArithmeticsRunFp16 run error error_code[" << ret << "]";
|
||||
}
|
||||
|
|
|
@ -20,6 +20,8 @@
|
|||
#include "nnacl/fp16/arithmetic_self_fp16.h"
|
||||
|
||||
using mindspore::lite::KernelRegistrar;
|
||||
using mindspore::lite::RET_ERROR;
|
||||
using mindspore::lite::RET_OK;
|
||||
|
||||
namespace mindspore::kernel {
|
||||
namespace {
|
||||
|
@ -81,11 +83,6 @@ void ArithmeticSelfFp16CPUKernel::FreeInputAndOutput() {
|
|||
}
|
||||
|
||||
int ArithmeticSelfFp16CPUKernel::Run() {
|
||||
auto ret = Prepare();
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Prepare fail! ret: " << ret;
|
||||
return ret;
|
||||
}
|
||||
auto input_tensor = in_tensors_.at(0);
|
||||
auto output_tensor = out_tensors_.at(0);
|
||||
input_fp16_ptr_ = ConvertInputFp32toFp16(input_tensor, context_);
|
||||
|
@ -95,7 +92,7 @@ int ArithmeticSelfFp16CPUKernel::Run() {
|
|||
MS_LOG(ERROR) << "input or output is nullptr";
|
||||
return RET_ERROR;
|
||||
}
|
||||
ret = ParallelLaunch(this->context_->thread_pool_, ArithmeticSelfRun, this, op_parameter_->thread_num_);
|
||||
auto ret = ParallelLaunch(this->context_->thread_pool_, ArithmeticSelfRun, this, op_parameter_->thread_num_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "ArithmeticSelfRun error error_code[" << ret << "]";
|
||||
}
|
||||
|
|
|
@ -21,6 +21,8 @@
|
|||
#include "src/kernel_registry.h"
|
||||
|
||||
using mindspore::lite::KernelRegistrar;
|
||||
using mindspore::lite::RET_ERROR;
|
||||
using mindspore::lite::RET_OK;
|
||||
using mindspore::schema::PrimitiveType_BatchNorm;
|
||||
|
||||
namespace mindspore::kernel {
|
||||
|
@ -47,11 +49,6 @@ int BatchnormFp16CPUKernel::InitConstTensor() {
|
|||
}
|
||||
|
||||
int BatchnormFp16CPUKernel::Run() {
|
||||
auto ret = Prepare();
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Prepare fail! Ret error code: " << ret;
|
||||
return ret;
|
||||
}
|
||||
auto input_tensor = in_tensors_.at(0);
|
||||
auto output_tensor = out_tensors_.at(0);
|
||||
input_ = ConvertInputFp32toFp16(input_tensor, context_);
|
||||
|
@ -62,7 +59,7 @@ int BatchnormFp16CPUKernel::Run() {
|
|||
return RET_ERROR;
|
||||
}
|
||||
|
||||
ret = ParallelLaunch(this->context_->thread_pool_, BatchNormRun, this, op_parameter_->thread_num_);
|
||||
auto ret = ParallelLaunch(this->context_->thread_pool_, BatchNormRun, this, op_parameter_->thread_num_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "BatchnormRun error error_code[" << ret << "]";
|
||||
}
|
||||
|
@ -76,7 +73,7 @@ int BatchnormFp16CPUKernel::Run() {
|
|||
int BatchnormFp16CPUKernel::DoExecute(int task_id) {
|
||||
auto param = reinterpret_cast<BatchNormParameter *>(op_parameter_);
|
||||
BatchNormFp16(input_, mean_, variance_, param, task_id, output_);
|
||||
return mindspore::lite::RET_OK;
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
void BatchnormFp16CPUKernel::FreeInputAndOutput() {
|
||||
|
|
|
@ -83,11 +83,6 @@ int CastFp16CPUKernel::DoCast(int thread_id) {
|
|||
}
|
||||
|
||||
int CastFp16CPUKernel::Run() {
|
||||
auto prepare_ret = Prepare();
|
||||
if (prepare_ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret;
|
||||
return prepare_ret;
|
||||
}
|
||||
if (data_num_ == 0) {
|
||||
return RET_OK;
|
||||
}
|
||||
|
|
|
@ -91,12 +91,6 @@ void ConcatFp16CPUKernel::FreeTmpBuffer() {
|
|||
}
|
||||
|
||||
int ConcatFp16CPUKernel::Run() {
|
||||
auto prepare_ret = Prepare();
|
||||
if (prepare_ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret;
|
||||
return prepare_ret;
|
||||
}
|
||||
|
||||
auto ret = MallocTmpBuffer();
|
||||
if (ret != RET_OK) {
|
||||
FreeTmpBuffer();
|
||||
|
|
|
@ -218,13 +218,7 @@ static int Convolution1x1Fp16RunHw(void *cdata, int task_id) {
|
|||
}
|
||||
|
||||
int Convolution1x1FP16CPUKernel::Run() {
|
||||
auto ret = Prepare();
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Prepare failed.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
ret = ConvolutionBaseFP16CPUKernel::GetExecuteTensor();
|
||||
auto ret = ConvolutionBaseFP16CPUKernel::GetExecuteTensor();
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Get executor tensor failed.";
|
||||
return ret;
|
||||
|
@ -248,10 +242,14 @@ int Convolution1x1FP16CPUKernel::Run() {
|
|||
}
|
||||
|
||||
if (multi_thread_by_hw_) {
|
||||
ParallelLaunch(this->context_->thread_pool_, Convolution1x1Fp16RunHw, this, thread_count_);
|
||||
ret = ParallelLaunch(this->context_->thread_pool_, Convolution1x1Fp16RunHw, this, thread_count_);
|
||||
} else {
|
||||
RowMajor2Col16MajorFp16Opt(input_ptr_, pack_input_, matmul_param_->row_, matmul_param_->deep_);
|
||||
ParallelLaunch(this->context_->thread_pool_, Convolution1x1Fp16RunOc, this, thread_count_);
|
||||
ret = ParallelLaunch(this->context_->thread_pool_, Convolution1x1Fp16RunOc, this, thread_count_);
|
||||
}
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "ParallelLaunch failed.";
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -23,7 +23,8 @@
|
|||
#include "src/runtime/runtime_api.h"
|
||||
|
||||
namespace mindspore::kernel {
|
||||
|
||||
using mindspore::lite::RET_ERROR;
|
||||
using mindspore::lite::RET_OK;
|
||||
ConvolutionBaseFP16CPUKernel::~ConvolutionBaseFP16CPUKernel() {
|
||||
if (fp16_weight_ != nullptr) {
|
||||
free(fp16_weight_);
|
||||
|
|
|
@ -32,10 +32,10 @@ class ConvolutionBaseFP16CPUKernel : public ConvolutionBaseCPUKernel {
|
|||
: ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {}
|
||||
~ConvolutionBaseFP16CPUKernel() override;
|
||||
|
||||
int Init() override { return RET_OK; }
|
||||
int ReSize() override { return RET_OK; }
|
||||
int Run() override { return RET_OK; }
|
||||
int RunImpl(int task_id) { return RET_OK; }
|
||||
int Init() override { return mindspore::lite::RET_OK; }
|
||||
int ReSize() override { return mindspore::lite::RET_OK; }
|
||||
int Run() override { return mindspore::lite::RET_OK; }
|
||||
int RunImpl(int task_id) { return mindspore::lite::RET_OK; }
|
||||
virtual int GetExecuteTensor();
|
||||
virtual int GetExecuteFilter();
|
||||
virtual void IfCastOutput();
|
||||
|
|
|
@ -110,12 +110,7 @@ static int ConvDwFp16Run(void *cdata, int task_id) {
|
|||
}
|
||||
|
||||
int ConvolutionDepthwiseFp16CPUKernel::Run() {
|
||||
auto ret = Prepare();
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Prepare failed.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
ret = ConvolutionBaseFP16CPUKernel::GetExecuteTensor();
|
||||
auto ret = ConvolutionBaseFP16CPUKernel::GetExecuteTensor();
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Get Execute tensor failed.";
|
||||
return ret;
|
||||
|
|
|
@ -140,12 +140,7 @@ static int ConvDwSWFp16Run(void *cdata, int task_id) {
|
|||
}
|
||||
|
||||
int ConvolutionDepthwiseSWFp16CPUKernel::Run() {
|
||||
auto ret = Prepare();
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Prepare failed.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
ret = InitBuffer();
|
||||
auto ret = InitBuffer();
|
||||
if (ret != 0) {
|
||||
MS_LOG(ERROR) << "Convolution depthwise fp16 InitBuffer failed.";
|
||||
return RET_ERROR;
|
||||
|
|
|
@ -145,13 +145,7 @@ static int ConvolutionFp16Impl(void *cdata, int task_id) {
|
|||
}
|
||||
|
||||
int ConvolutionFP16CPUKernel::Run() {
|
||||
auto ret = Prepare();
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Prepare failed.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
ret = ConvolutionBaseFP16CPUKernel::GetExecuteTensor();
|
||||
auto ret = ConvolutionBaseFP16CPUKernel::GetExecuteTensor();
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Get Execute tensor failed.";
|
||||
return ret;
|
||||
|
|
|
@ -211,12 +211,6 @@ static int ConvolutionWinogradFp16Impl(void *cdata, int task_id) {
|
|||
}
|
||||
|
||||
int ConvolutionWinogradFP16CPUKernel::Run() {
|
||||
auto prepare_ret = Prepare();
|
||||
if (prepare_ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret;
|
||||
return prepare_ret;
|
||||
}
|
||||
|
||||
auto ret = ConvolutionBaseFP16CPUKernel::GetExecuteTensor();
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Get Execute tensor failed.";
|
||||
|
|
|
@ -61,11 +61,6 @@ static int CropFp16Run(void *cdata, int task_id) {
|
|||
}
|
||||
|
||||
int CropFp16CPUKernel::Run() {
|
||||
auto ret = Prepare();
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Prepare failed.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
input_ptr_ = ConvertInputFp32toFp16(in_tensors_.at(kInputIndex), context_);
|
||||
if (input_ptr_ == nullptr) {
|
||||
MS_LOG(ERROR) << "input or output is nullptr";
|
||||
|
@ -79,8 +74,11 @@ int CropFp16CPUKernel::Run() {
|
|||
return RET_ERROR;
|
||||
}
|
||||
|
||||
ret = ParallelLaunch(this->context_->thread_pool_, CropFp16Run, this, thread_count_);
|
||||
|
||||
auto ret = ParallelLaunch(this->context_->thread_pool_, CropFp16Run, this, thread_count_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "ParallelLaunch failed: " << ret;
|
||||
return ret;
|
||||
}
|
||||
if (out_tensors_.at(kOutputIndex)->data_type() == kNumberTypeFloat32) {
|
||||
Float16ToFloat32(output_ptr_, reinterpret_cast<float *>(out_tensors_.at(kOutputIndex)->MutableData()),
|
||||
out_tensors_.at(kOutputIndex)->ElementsNum());
|
||||
|
|
|
@ -156,12 +156,7 @@ int DeconvolutionDepthwiseFp16CPUKernel::Run() {
|
|||
MS_LOG(ERROR) << "Only support input channel equals output channel.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
auto ret = Prepare();
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Prepare failed.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
ret = InitBuffer();
|
||||
auto ret = InitBuffer();
|
||||
if (ret != 0) {
|
||||
MS_LOG(ERROR) << "Deconvolution depthwise fp16 InitBuffer failed.";
|
||||
return RET_ERROR;
|
||||
|
|
|
@ -179,11 +179,6 @@ int DeConvolutionFp16CPUKernel::Init() {
|
|||
}
|
||||
|
||||
int DeConvolutionFp16CPUKernel::Run() {
|
||||
auto prepare_ret = Prepare();
|
||||
if (prepare_ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret;
|
||||
return prepare_ret;
|
||||
}
|
||||
ConvolutionBaseFP16CPUKernel::GetExecuteTensor();
|
||||
|
||||
int error_code = InitRunBuf();
|
||||
|
|
|
@ -0,0 +1,36 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <arm_neon.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
#ifdef ENABLE_ARM64
|
||||
extern void Float32ToFloat16(const float *input, float16_t *output, int number);
|
||||
extern void Float16ToFloat32(const float16_t *input, float *output, int number);
|
||||
|
||||
void Float32ToFloat16_fp16_handler(const void *input, void *output, int number) {
|
||||
Float32ToFloat16(reinterpret_cast<const float *>(input), reinterpret_cast<float16_t *>(output), number);
|
||||
}
|
||||
void Float16ToFloat32_fp16_handler(const void *input, void *output, int number) {
|
||||
Float16ToFloat32(reinterpret_cast<const float16_t *>(input), reinterpret_cast<float *>(output), number);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
|
@ -179,11 +179,6 @@ int FcFP16Run(void *cdata, int task_id) {
|
|||
}
|
||||
|
||||
int FullconnectionFP16CPUKernel::Run() {
|
||||
auto prepare_ret = Prepare();
|
||||
if (prepare_ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret;
|
||||
return prepare_ret;
|
||||
}
|
||||
auto out_tensor = out_tensors_[0];
|
||||
if (out_tensor->data_type() == kNumberTypeFloat32) {
|
||||
output_ptr_ = output_fp16_;
|
||||
|
|
|
@ -20,6 +20,8 @@
|
|||
#include "src/kernel_registry.h"
|
||||
|
||||
using mindspore::lite::KernelRegistrar;
|
||||
using mindspore::lite::RET_ERROR;
|
||||
using mindspore::lite::RET_OK;
|
||||
using mindspore::schema::PrimitiveType_FusedBatchNorm;
|
||||
|
||||
namespace mindspore::kernel {
|
||||
|
@ -70,11 +72,11 @@ int FusedBatchnormFp16CPUKernel::DoExecute(int task_id) {
|
|||
context_->allocator->Free(mean_fp16);
|
||||
context_->allocator->Free(variance_fp16);
|
||||
context_->allocator->Free(output_fp16);
|
||||
return mindspore::lite::RET_OK;
|
||||
return RET_OK;
|
||||
}
|
||||
FusedBatchNormFp16(in_tensors_.at(0)->MutableData(), scale_, offset_, mean_, variance_, param, task_id,
|
||||
out_tensors_.at(0)->MutableData());
|
||||
return mindspore::lite::RET_OK;
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
kernel::LiteKernel *CpuFusedBatchnormFp16KernelCreator(const std::vector<lite::Tensor *> &inputs,
|
||||
|
|
|
@ -210,11 +210,6 @@ int MatmulFP16Run(void *cdata, int task_id) {
|
|||
}
|
||||
|
||||
int MatmulFP16CPUKernel::Run() {
|
||||
auto prepare_ret = Prepare();
|
||||
if (prepare_ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret;
|
||||
return prepare_ret;
|
||||
}
|
||||
auto out_tensor = out_tensors_[0];
|
||||
float16_t *c_ptr = nullptr;
|
||||
if (out_tensor->data_type() == kNumberTypeFloat32) {
|
||||
|
|
|
@ -33,11 +33,6 @@ int PadFp16CPUKernel::RunImpl(int task_id) {
|
|||
}
|
||||
|
||||
int PadFp16CPUKernel::Run() {
|
||||
auto ret = Prepare();
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Prepare fail!ret: " << ret;
|
||||
return ret;
|
||||
}
|
||||
auto input_tensor = in_tensors_.at(0);
|
||||
auto output_tensor = out_tensors_.at(0);
|
||||
is_input_fp32_ = input_tensor->data_type() == kNumberTypeFloat32;
|
||||
|
@ -58,7 +53,7 @@ int PadFp16CPUKernel::Run() {
|
|||
output_[i] = pad_param_->constant_value_;
|
||||
}
|
||||
}
|
||||
ret = ParallelLaunch(this->context_->thread_pool_, PadImpl, this, op_parameter_->thread_num_);
|
||||
auto ret = ParallelLaunch(this->context_->thread_pool_, PadImpl, this, op_parameter_->thread_num_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "BatchnormRun error error_code[" << ret << "]";
|
||||
}
|
||||
|
|
|
@ -84,12 +84,6 @@ static int PoolingFp16Impl(void *cdata, int task_id) {
|
|||
}
|
||||
|
||||
int PoolingFp16CPUKernel::Run() {
|
||||
auto prepare_ret = Prepare();
|
||||
if (prepare_ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret;
|
||||
return prepare_ret;
|
||||
}
|
||||
|
||||
auto input_tensor = in_tensors_.at(kInputIndex);
|
||||
auto in_data_type_ = input_tensor->data_type();
|
||||
MS_ASSERT(in_data_type_ == kNumberTypeFloat32 || in_data_type_ == kNumberTypeFloat16);
|
||||
|
|
|
@ -76,12 +76,6 @@ static int ReduceFp16Impl(void *cdata, int task_id) {
|
|||
}
|
||||
|
||||
int ReduceFp16CPUKernel::Run() {
|
||||
auto prepare_ret = Prepare();
|
||||
if (prepare_ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret;
|
||||
return prepare_ret;
|
||||
}
|
||||
|
||||
auto ret = MallocTmpBuffer();
|
||||
if (ret != RET_OK) {
|
||||
FreeTmpBuffer();
|
||||
|
|
|
@ -31,11 +31,6 @@ using mindspore::schema::PrimitiveType_Reshape;
|
|||
namespace mindspore::kernel {
|
||||
|
||||
int ReshapeFp16CPUKernel::Run() {
|
||||
auto ret = Prepare();
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Prepare fail!ret: " << ret;
|
||||
return ret;
|
||||
}
|
||||
auto in_tensor = in_tensors_.at(kInputIndex);
|
||||
auto out_tensor = out_tensors_.at(kOutputIndex);
|
||||
auto input_ptr = in_tensor->MutableData();
|
||||
|
|
|
@ -103,12 +103,7 @@ int ScaleFp16Run(void *cdata, int task_id) {
|
|||
}
|
||||
|
||||
int ScaleFp16CPUKernel::Run() {
|
||||
auto ret = Prepare();
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Prepare fail!ret: " << ret;
|
||||
return ret;
|
||||
}
|
||||
ret = InitScaleOffset();
|
||||
auto ret = InitScaleOffset();
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Scale fp16 InitScaleOffset failed.";
|
||||
return RET_ERROR;
|
||||
|
|
|
@ -20,6 +20,8 @@
|
|||
#include "nnacl/fp16/slice_fp16.h"
|
||||
|
||||
using mindspore::lite::KernelRegistrar;
|
||||
using mindspore::lite::RET_ERROR;
|
||||
using mindspore::lite::RET_OK;
|
||||
using mindspore::schema::PrimitiveType_Slice;
|
||||
|
||||
namespace mindspore::kernel {
|
||||
|
@ -29,11 +31,6 @@ int SliceFp16CPUKernel::SliceParallelRun(int thread_id) {
|
|||
}
|
||||
|
||||
int SliceFp16CPUKernel::Run() {
|
||||
auto ret = Prepare();
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Prepare fail!ret: " << ret;
|
||||
return ret;
|
||||
}
|
||||
input_fp16_ = ConvertInputFp32toFp16(in_tensors_.at(0), context_);
|
||||
output_fp16_ = MallocOutputFp16(out_tensors_.at(0), context_);
|
||||
if (input_fp16_ == nullptr || output_fp16_ == nullptr) {
|
||||
|
@ -45,7 +42,7 @@ int SliceFp16CPUKernel::Run() {
|
|||
DoSliceFp16NoParallel(input_fp16_, output_fp16_, param_);
|
||||
return RET_OK;
|
||||
}
|
||||
ret = ParallelLaunch(this->context_->thread_pool_, SliceLaunch, this, op_parameter_->thread_num_);
|
||||
auto ret = ParallelLaunch(this->context_->thread_pool_, SliceLaunch, this, op_parameter_->thread_num_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "slice launch fail!ret: " << ret;
|
||||
}
|
||||
|
|
|
@ -104,12 +104,7 @@ void SoftmaxFp16CPUKernel::FreeTmpBuffer() {
|
|||
}
|
||||
|
||||
int SoftmaxFp16CPUKernel::Run() {
|
||||
auto ret = Prepare();
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Prepare fail!ret: " << ret;
|
||||
return RET_ERROR;
|
||||
}
|
||||
ret = MallocTmpBuffer();
|
||||
auto ret = MallocTmpBuffer();
|
||||
if (ret != RET_OK) {
|
||||
FreeTmpBuffer();
|
||||
MS_LOG(ERROR) << "MallocTmpBuffer failed";
|
||||
|
|
|
@ -76,11 +76,6 @@ static int SplitFp16Run(void *cdata, int task_id) {
|
|||
}
|
||||
|
||||
int SplitFp16CPUKernel::Run() {
|
||||
auto ret = Prepare();
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Prepare failed.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
input_ptr_ = ConvertInputFp32toFp16(in_tensors_.at(0), context_);
|
||||
if (input_ptr_ == nullptr) {
|
||||
MS_LOG(ERROR) << "input or output is nullptr";
|
||||
|
@ -94,7 +89,7 @@ int SplitFp16CPUKernel::Run() {
|
|||
return RET_ERROR;
|
||||
}
|
||||
}
|
||||
ret = ParallelLaunch(this->context_->thread_pool_, SplitFp16Run, this, thread_n_num_);
|
||||
auto ret = ParallelLaunch(this->context_->thread_pool_, SplitFp16Run, this, thread_n_num_);
|
||||
for (int i = 0; i < param->num_split_; i++) {
|
||||
if (out_tensors_.at(i)->data_type() == kNumberTypeFloat32) {
|
||||
Float16ToFloat32(output_ptr_[i], reinterpret_cast<float *>(out_tensors_.at(i)->MutableData()),
|
||||
|
|
|
@ -76,11 +76,6 @@ void StackFp16CPUKernel::FreeBuffer() {
|
|||
}
|
||||
|
||||
int StackFp16CPUKernel::Run() {
|
||||
auto ret = Prepare();
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Prepare fail!ret: " << ret;
|
||||
return ret;
|
||||
}
|
||||
size_t inputs_num = in_tensors_.size();
|
||||
auto input0 = in_tensors_[0];
|
||||
if (inputs_num == 1) {
|
||||
|
@ -88,7 +83,7 @@ int StackFp16CPUKernel::Run() {
|
|||
return RET_OK;
|
||||
}
|
||||
InitMallocFlags();
|
||||
ret = MallocAssignBuffer();
|
||||
auto ret = MallocAssignBuffer();
|
||||
if (ret != RET_OK) {
|
||||
FreeBuffer();
|
||||
return ret;
|
||||
|
|
|
@ -128,11 +128,6 @@ static int TransposeFp16Run(void *cdata, int task_id) {
|
|||
}
|
||||
|
||||
int TransposeFp16CPUKernel::Run() {
|
||||
auto ret = Prepare();
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Prepare fail!ret: " << ret;
|
||||
return ret;
|
||||
}
|
||||
MS_ASSERT(in_tensors_.size() == TransposeInputNum);
|
||||
MS_ASSERT(out_tensors_.size() == TransposeOutputNum);
|
||||
auto &in_tensor = in_tensors_.front();
|
||||
|
@ -143,7 +138,7 @@ int TransposeFp16CPUKernel::Run() {
|
|||
}
|
||||
|
||||
// malloc when Run
|
||||
ret = MallocFp16Buffer();
|
||||
auto ret = MallocFp16Buffer();
|
||||
if (ret != RET_OK) {
|
||||
FreeFp16Buffer();
|
||||
return ret;
|
||||
|
|
|
@ -83,11 +83,6 @@ int ActivationRun(void *cdata, int task_id) {
|
|||
}
|
||||
|
||||
int ActivationCPUKernel::Run() {
|
||||
auto ret = Prepare();
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Prepare failed.";
|
||||
return ret;
|
||||
}
|
||||
int error_code = ParallelLaunch(this->context_->thread_pool_, ActivationRun, this, thread_count_);
|
||||
if (error_code != RET_OK) {
|
||||
MS_LOG(ERROR) << "Activation function error error_code[" << error_code << "]";
|
||||
|
|
|
@ -55,11 +55,6 @@ int AddNCPUKernel::AddNParallelRun(int thread_id) {
|
|||
}
|
||||
|
||||
int AddNCPUKernel::Run() {
|
||||
auto ret = Prepare();
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Prepare fail!ret: " << ret;
|
||||
return ret;
|
||||
}
|
||||
elements_num_ = out_tensors_[0]->ElementsNum();
|
||||
auto input0_data = reinterpret_cast<float *>(in_tensors_[0]->MutableData());
|
||||
auto input1_data = reinterpret_cast<float *>(in_tensors_[1]->MutableData());
|
||||
|
@ -94,7 +89,7 @@ int AddNCPUKernel::Run() {
|
|||
in1_addr_ = input0_data;
|
||||
in2_addr_ = input1_data;
|
||||
out_addr_ = output_data;
|
||||
ret = ParallelLaunch(this->context_->thread_pool_, AddNLaunch, this, op_parameter_->thread_num_);
|
||||
auto ret = ParallelLaunch(this->context_->thread_pool_, AddNLaunch, this, op_parameter_->thread_num_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "addn launch fail!ret: " << ret;
|
||||
return RET_ERROR;
|
||||
|
|
|
@ -45,12 +45,7 @@ int ArgMinMaxCPUKernel::Init() {
|
|||
int ArgMinMaxCPUKernel::ReSize() { return ArgMinMaxBaseCPUKernel::ReSize(); }
|
||||
|
||||
int ArgMinMaxCPUKernel::Run() {
|
||||
auto ret = Prepare();
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Prepare fail!ret: " << ret;
|
||||
return ret;
|
||||
}
|
||||
ret = ArgMinMaxBaseCPUKernel::Run();
|
||||
auto ret = ArgMinMaxBaseCPUKernel::Run();
|
||||
return ret;
|
||||
}
|
||||
} // namespace mindspore::kernel
|
||||
|
|
|
@ -189,39 +189,38 @@ int ArithmeticCPUKernel::DoArithmetic(int task_id) {
|
|||
int out_count = MSMIN(stride, outside_ - stride * task_id);
|
||||
int out_thread_stride = stride * task_id;
|
||||
if (data_type_ == kDataTypeFloat) {
|
||||
error_code =
|
||||
BroadcastRun(reinterpret_cast<float *>(in_tensors_[0]->MutableData()),
|
||||
reinterpret_cast<float *>(in_tensors_[1]->MutableData()),
|
||||
reinterpret_cast<float *>(out_tensors_[0]->MutableData()), 0, out_count, out_thread_stride);
|
||||
error_code = BroadcastRun(reinterpret_cast<float *>(in_tensors_[0]->data_c()),
|
||||
reinterpret_cast<float *>(in_tensors_[1]->data_c()),
|
||||
reinterpret_cast<float *>(out_tensors_[0]->data_c()), 0, out_count, out_thread_stride);
|
||||
} else {
|
||||
error_code = BroadcastRun(
|
||||
reinterpret_cast<int *>(in_tensors_[0]->MutableData()), reinterpret_cast<int *>(in_tensors_[1]->MutableData()),
|
||||
reinterpret_cast<int *>(out_tensors_[0]->MutableData()), 0, out_count, out_thread_stride);
|
||||
error_code = BroadcastRun(reinterpret_cast<int *>(in_tensors_[0]->data_c()),
|
||||
reinterpret_cast<int *>(in_tensors_[1]->data_c()),
|
||||
reinterpret_cast<int *>(out_tensors_[0]->data_c()), 0, out_count, out_thread_stride);
|
||||
}
|
||||
|
||||
} else if (arithmetic_opt_run_ != nullptr) { // no broadcast, one of input is scalar
|
||||
if (arithmeticParameter_->in_elements_num0_ == 1) {
|
||||
if (data_type_ == kDataTypeFloat) {
|
||||
error_code = arithmetic_opt_run_(reinterpret_cast<float *>(in_tensors_[0]->MutableData()),
|
||||
reinterpret_cast<float *>(in_tensors_[1]->MutableData()) + stride * task_id,
|
||||
reinterpret_cast<float *>(out_tensors_[0]->MutableData()) + stride * task_id,
|
||||
count, arithmeticParameter_);
|
||||
error_code = arithmetic_opt_run_(reinterpret_cast<float *>(in_tensors_[0]->data_c()),
|
||||
reinterpret_cast<float *>(in_tensors_[1]->data_c()) + stride * task_id,
|
||||
reinterpret_cast<float *>(out_tensors_[0]->data_c()) + stride * task_id, count,
|
||||
arithmeticParameter_);
|
||||
} else {
|
||||
error_code = arithmetic_opt_run_int_(reinterpret_cast<int *>(in_tensors_[0]->MutableData()),
|
||||
reinterpret_cast<int *>(in_tensors_[1]->MutableData()) + stride * task_id,
|
||||
reinterpret_cast<int *>(out_tensors_[0]->MutableData()) + stride * task_id,
|
||||
error_code = arithmetic_opt_run_int_(reinterpret_cast<int *>(in_tensors_[0]->data_c()),
|
||||
reinterpret_cast<int *>(in_tensors_[1]->data_c()) + stride * task_id,
|
||||
reinterpret_cast<int *>(out_tensors_[0]->data_c()) + stride * task_id,
|
||||
count, arithmeticParameter_);
|
||||
}
|
||||
} else if (arithmeticParameter_->in_elements_num1_ == 1) {
|
||||
if (data_type_ == kDataTypeFloat) {
|
||||
error_code = arithmetic_opt_run_(reinterpret_cast<float *>(in_tensors_[0]->MutableData()) + stride * task_id,
|
||||
reinterpret_cast<float *>(in_tensors_[1]->MutableData()),
|
||||
reinterpret_cast<float *>(out_tensors_[0]->MutableData()) + stride * task_id,
|
||||
count, arithmeticParameter_);
|
||||
error_code = arithmetic_opt_run_(reinterpret_cast<float *>(in_tensors_[0]->data_c()) + stride * task_id,
|
||||
reinterpret_cast<float *>(in_tensors_[1]->data_c()),
|
||||
reinterpret_cast<float *>(out_tensors_[0]->data_c()) + stride * task_id, count,
|
||||
arithmeticParameter_);
|
||||
} else {
|
||||
error_code = arithmetic_opt_run_int_(reinterpret_cast<int *>(in_tensors_[0]->MutableData()) + stride * task_id,
|
||||
reinterpret_cast<int *>(in_tensors_[1]->MutableData()),
|
||||
reinterpret_cast<int *>(out_tensors_[0]->MutableData()) + stride * task_id,
|
||||
error_code = arithmetic_opt_run_int_(reinterpret_cast<int *>(in_tensors_[0]->data_c()) + stride * task_id,
|
||||
reinterpret_cast<int *>(in_tensors_[1]->data_c()),
|
||||
reinterpret_cast<int *>(out_tensors_[0]->data_c()) + stride * task_id,
|
||||
count, arithmeticParameter_);
|
||||
}
|
||||
} else {
|
||||
|
@ -230,14 +229,13 @@ int ArithmeticCPUKernel::DoArithmetic(int task_id) {
|
|||
}
|
||||
} else { // no broadcast, neither is scalar, two same shape
|
||||
if (data_type_ == kDataTypeFloat) {
|
||||
error_code = arithmetic_run_(reinterpret_cast<float *>(in_tensors_[0]->MutableData()) + stride * task_id,
|
||||
reinterpret_cast<float *>(in_tensors_[1]->MutableData()) + stride * task_id,
|
||||
reinterpret_cast<float *>(out_tensors_[0]->MutableData()) + stride * task_id, count);
|
||||
error_code = arithmetic_run_(reinterpret_cast<float *>(in_tensors_[0]->data_c()) + stride * task_id,
|
||||
reinterpret_cast<float *>(in_tensors_[1]->data_c()) + stride * task_id,
|
||||
reinterpret_cast<float *>(out_tensors_[0]->data_c()) + stride * task_id, count);
|
||||
} else {
|
||||
error_code =
|
||||
arithmetic_run_int_(reinterpret_cast<int *>(in_tensors_[0]->MutableData()) + stride * task_id,
|
||||
reinterpret_cast<int *>(in_tensors_[1]->MutableData()) + stride * task_id,
|
||||
reinterpret_cast<int *>(out_tensors_[0]->MutableData()) + stride * task_id, count);
|
||||
error_code = arithmetic_run_int_(reinterpret_cast<int *>(in_tensors_[0]->data_c()) + stride * task_id,
|
||||
reinterpret_cast<int *>(in_tensors_[1]->data_c()) + stride * task_id,
|
||||
reinterpret_cast<int *>(out_tensors_[0]->data_c()) + stride * task_id, count);
|
||||
}
|
||||
}
|
||||
if (error_code != RET_OK) {
|
||||
|
@ -257,11 +255,6 @@ int ArithmeticsRun(void *cdata, int task_id) {
|
|||
}
|
||||
|
||||
int ArithmeticCPUKernel::Run() {
|
||||
auto ret = Prepare();
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Prepare fail!ret: " << ret;
|
||||
return ret;
|
||||
}
|
||||
if (arithmeticParameter_->broadcasting_) {
|
||||
outside_ = 1;
|
||||
for (auto i = arithmeticParameter_->ndim_ - 1; i >= 0; --i) {
|
||||
|
|
|
@ -18,6 +18,8 @@
|
|||
#include "nnacl/fp32/arithmetic_self.h"
|
||||
|
||||
using mindspore::lite::KernelRegistrar;
|
||||
using mindspore::lite::RET_ERROR;
|
||||
using mindspore::lite::RET_OK;
|
||||
|
||||
namespace mindspore::kernel {
|
||||
namespace {
|
||||
|
@ -88,12 +90,7 @@ int ArithmeticSelfRun(void *cdata, int task_id) {
|
|||
}
|
||||
|
||||
int ArithmeticSelfCPUKernel::Run() {
|
||||
auto ret = Prepare();
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Prepare fail! ret: " << ret;
|
||||
return ret;
|
||||
}
|
||||
ret = ParallelLaunch(this->context_->thread_pool_, ArithmeticSelfRun, this, op_parameter_->thread_num_);
|
||||
auto ret = ParallelLaunch(this->context_->thread_pool_, ArithmeticSelfRun, this, op_parameter_->thread_num_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "ArithmeticSelfRun error error_code[" << ret << "]";
|
||||
}
|
||||
|
|
|
@ -38,11 +38,6 @@ int BatchToSpaceCPUKernel::Init() {
|
|||
int BatchToSpaceCPUKernel::ReSize() { return BatchToSpaceBaseCPUKernel::ReSize(); }
|
||||
|
||||
int BatchToSpaceCPUKernel::Run() {
|
||||
auto prepare_ret = Prepare();
|
||||
if (prepare_ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret;
|
||||
return prepare_ret;
|
||||
}
|
||||
auto input = in_tensors_[0];
|
||||
auto output = out_tensors_[0];
|
||||
const float *input_data = reinterpret_cast<const float *>(input->MutableData());
|
||||
|
|
|
@ -18,6 +18,8 @@
|
|||
#include "src/kernel_registry.h"
|
||||
|
||||
using mindspore::lite::KernelRegistrar;
|
||||
using mindspore::lite::RET_ERROR;
|
||||
using mindspore::lite::RET_OK;
|
||||
using mindspore::schema::PrimitiveType_BatchNorm;
|
||||
|
||||
namespace mindspore::kernel {
|
||||
|
@ -70,12 +72,7 @@ int BatchnormCPUKernel::InitConstTensor() {
|
|||
}
|
||||
|
||||
int BatchnormCPUKernel::Run() {
|
||||
auto ret = Prepare();
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Prepare fail! Ret error code: " << ret;
|
||||
return ret;
|
||||
}
|
||||
ret = ParallelLaunch(this->context_->thread_pool_, BatchNormRun, this, op_parameter_->thread_num_);
|
||||
auto ret = ParallelLaunch(this->context_->thread_pool_, BatchNormRun, this, op_parameter_->thread_num_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "BatchnormRun error error_code[" << ret << "]";
|
||||
}
|
||||
|
@ -85,7 +82,7 @@ int BatchnormCPUKernel::Run() {
|
|||
int BatchnormCPUKernel::DoExecute(int task_id) {
|
||||
auto param = reinterpret_cast<BatchNormParameter *>(op_parameter_);
|
||||
BatchNormFp32(in_tensors_.at(0)->MutableData(), mean_, variance_, param, task_id, out_tensors_.at(0)->MutableData());
|
||||
return mindspore::lite::RET_OK;
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int BatchNormRun(void *cdata, int task_id) {
|
||||
|
|
|
@ -42,11 +42,6 @@ int BiasCPUKernel::ReSize() {
|
|||
}
|
||||
|
||||
int BiasCPUKernel::Run() {
|
||||
auto prepare_ret = Prepare();
|
||||
if (prepare_ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret;
|
||||
return prepare_ret;
|
||||
}
|
||||
auto in = reinterpret_cast<float *>(in_tensors_.at(0)->MutableData());
|
||||
auto bias = reinterpret_cast<float *>(in_tensors_.at(1)->MutableData());
|
||||
auto out = reinterpret_cast<float *>(out_tensors_.at(0)->MutableData());
|
||||
|
|
|
@ -49,11 +49,6 @@ int BroadcastToCPUKernel::Init() {
|
|||
}
|
||||
|
||||
int BroadcastToCPUKernel::Run() {
|
||||
auto prepare_ret = Prepare();
|
||||
if (prepare_ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret;
|
||||
return prepare_ret;
|
||||
}
|
||||
auto input_data = reinterpret_cast<float *>(in_tensors_.at(0)->MutableData());
|
||||
auto output_data = reinterpret_cast<float *>(out_tensors_.at(0)->MutableData());
|
||||
|
||||
|
|
|
@ -66,15 +66,16 @@ int CastCPUKernel::DoCast(int thread_id) {
|
|||
|
||||
auto offset = thread_id * stride_;
|
||||
auto output = out_tensors_.at(0);
|
||||
auto output_data = output->MutableData();
|
||||
auto output_data = output->data_c();
|
||||
MS_ASSERT(output_data != nullptr);
|
||||
auto input_data_type = input->data_type();
|
||||
auto output_data_type = output->data_type();
|
||||
if (output_data_type != kNumberTypeFloat32) {
|
||||
if (input_data_type == kNumberTypeFloat32 && output_data_type == kNumberTypeInt32) {
|
||||
Float32ToInt32(reinterpret_cast<float *>(input->MutableData()) + offset,
|
||||
Float32ToInt32(reinterpret_cast<float *>(input->data_c()) + offset,
|
||||
reinterpret_cast<int32_t *>(output_data) + offset, data_num);
|
||||
} else if (input_data_type == kNumberTypeFloat32 && output_data_type == kNumberTypeFloat16) {
|
||||
Float32ToFp16(reinterpret_cast<float *>(input->MutableData()) + offset,
|
||||
Float32ToFp16(reinterpret_cast<float *>(input->data_c()) + offset,
|
||||
reinterpret_cast<uint16_t *>(output_data) + offset, data_num);
|
||||
} else {
|
||||
MS_LOG(ERROR) << "Unsupported datatype from " << input_data_type << " to " << output_data_type;
|
||||
|
@ -106,11 +107,6 @@ int CastCPUKernel::DoCast(int thread_id) {
|
|||
}
|
||||
|
||||
int CastCPUKernel::Run() {
|
||||
auto prepare_ret = Prepare();
|
||||
if (prepare_ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret;
|
||||
return prepare_ret;
|
||||
}
|
||||
if (data_num_ == 0) {
|
||||
return RET_OK;
|
||||
}
|
||||
|
|
|
@ -75,11 +75,6 @@ int ConcatsRun(void *cdata, int task_id) {
|
|||
}
|
||||
|
||||
int ConcatCPUKernel::Run() {
|
||||
auto prepare_ret = Prepare();
|
||||
if (prepare_ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret;
|
||||
return prepare_ret;
|
||||
}
|
||||
int error_code = ParallelLaunch(this->context_->thread_pool_, ConcatsRun, this, thread_count_);
|
||||
return error_code;
|
||||
}
|
||||
|
|
|
@ -52,11 +52,6 @@ int ConstantOfShapeRun(void *cdata, int task_id) {
|
|||
}
|
||||
|
||||
int ConstantOfShapeCPUKernel::Run() {
|
||||
auto prepare_ret = Prepare();
|
||||
if (prepare_ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret;
|
||||
return prepare_ret;
|
||||
}
|
||||
param_->element_sz_ = out_tensors_.front()->ElementsNum();
|
||||
int thread_num = MSMIN(param_->op_parameter_.thread_num_, param_->element_sz_);
|
||||
param_->unit_ = UP_DIV(param_->element_sz_, thread_num);
|
||||
|
|
|
@ -141,12 +141,6 @@ int ConvolutionImpl(void *cdata, int task_id) {
|
|||
}
|
||||
|
||||
int ConvolutionCPUKernel::Run() {
|
||||
auto prepare_ret = Prepare();
|
||||
if (prepare_ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret;
|
||||
return prepare_ret;
|
||||
}
|
||||
|
||||
auto ret = InitTmpBuffer();
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Init tmp buffer failed.";
|
||||
|
|
|
@ -194,11 +194,6 @@ int Convolution1x1RunHw(void *cdata, int task_id) {
|
|||
}
|
||||
|
||||
int Convolution1x1CPUKernel::Run() {
|
||||
auto prepare_ret = Prepare();
|
||||
if (prepare_ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret;
|
||||
return prepare_ret;
|
||||
}
|
||||
auto src_in = reinterpret_cast<float *>(in_tensors_[0]->MutableData());
|
||||
auto src_out = reinterpret_cast<float *>(out_tensors_[0]->MutableData());
|
||||
|
||||
|
|
|
@ -101,19 +101,13 @@ int ConvDwRun(void *cdata, int task_id) {
|
|||
}
|
||||
|
||||
int ConvolutionDepthwiseCPUKernel::Run() {
|
||||
auto ret = Prepare();
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Prepare failed.";
|
||||
return ret;
|
||||
}
|
||||
|
||||
auto input_tensor = in_tensors_.at(kInputIndex);
|
||||
input_ptr_ = reinterpret_cast<float *>(input_tensor->MutableData());
|
||||
|
||||
auto output_tensor = out_tensors_.at(kOutputIndex);
|
||||
output_ptr_ = reinterpret_cast<float *>(output_tensor->MutableData());
|
||||
|
||||
ret = ParallelLaunch(this->context_->thread_pool_, ConvDwRun, this, conv_param_->thread_num_);
|
||||
auto ret = ParallelLaunch(this->context_->thread_pool_, ConvDwRun, this, conv_param_->thread_num_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "ConvDwRun error: error_code[" << ret << "]";
|
||||
return RET_ERROR;
|
||||
|
|
|
@ -134,12 +134,7 @@ int ConvDwSWRun(void *cdata, int task_id) {
|
|||
}
|
||||
|
||||
int ConvolutionDepthwiseSWCPUKernel::Run() {
|
||||
auto ret = Prepare();
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Prepare failed.";
|
||||
return ret;
|
||||
}
|
||||
ret = InitBuffer();
|
||||
auto ret = InitBuffer();
|
||||
if (ret != 0) {
|
||||
MS_LOG(ERROR) << "Convolution depthwise fp32 InitBuffer failed.";
|
||||
return RET_ERROR;
|
||||
|
|
|
@ -219,12 +219,6 @@ int ConvolutionWinogradImpl(void *cdata, int task_id) {
|
|||
}
|
||||
|
||||
int ConvolutionWinogradCPUKernel::Run() {
|
||||
auto prepare_ret = Prepare();
|
||||
if (prepare_ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret;
|
||||
return prepare_ret;
|
||||
}
|
||||
|
||||
auto ret = InitTmpBuffer();
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Init tmp buffer failed.";
|
||||
|
|
|
@ -53,11 +53,6 @@ int CropCPUKernel::CropParallelRun(int thread_id) {
|
|||
}
|
||||
|
||||
int CropCPUKernel::Run() {
|
||||
auto prepare_ret = Prepare();
|
||||
if (prepare_ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret;
|
||||
return prepare_ret;
|
||||
}
|
||||
auto input = in_tensors_[0];
|
||||
auto output = out_tensors_[0];
|
||||
auto param = reinterpret_cast<CropParameter *>(op_parameter_);
|
||||
|
|
|
@ -196,11 +196,6 @@ int DeConvolutionCPUKernel::InitRunBuf() {
|
|||
}
|
||||
|
||||
int DeConvolutionCPUKernel::Run() {
|
||||
auto prepare_ret = Prepare();
|
||||
if (prepare_ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret;
|
||||
return prepare_ret;
|
||||
}
|
||||
float *src_in = reinterpret_cast<float *>(in_tensors_[0]->MutableData());
|
||||
float *src_out = reinterpret_cast<float *>(out_tensors_[0]->MutableData());
|
||||
|
||||
|
|
|
@ -151,13 +151,7 @@ int DeconvolutionDepthwiseCPUKernel::Run() {
|
|||
return RET_ERROR;
|
||||
}
|
||||
|
||||
auto ret = Prepare();
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Prepare fail!ret: " << ret;
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = InitBuffer();
|
||||
auto ret = InitBuffer();
|
||||
if (ret != 0) {
|
||||
MS_LOG(ERROR) << "Deconvolution depthwise fp32 InitBuffer failed.ret: " << ret;
|
||||
return ret;
|
||||
|
|
|
@ -47,11 +47,6 @@ int DepthToSpaceCPUKernel::Init() {
|
|||
int DepthToSpaceCPUKernel::ReSize() { return DepthToSpaceBaseCPUKernel::ReSize(); }
|
||||
|
||||
int DepthToSpaceCPUKernel::Run() {
|
||||
auto prepare_ret = Prepare();
|
||||
if (prepare_ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret;
|
||||
return prepare_ret;
|
||||
}
|
||||
auto input = in_tensors_[0];
|
||||
auto output = out_tensors_[0];
|
||||
const float *input_data = reinterpret_cast<const float *>(input->MutableData());
|
||||
|
|
|
@ -66,11 +66,6 @@ DetectionPostProcessCPUKernel::~DetectionPostProcessCPUKernel() {
|
|||
int DetectionPostProcessCPUKernel::ReSize() { return RET_OK; }
|
||||
|
||||
int DetectionPostProcessCPUKernel::Run() {
|
||||
auto prepare_ret = Prepare();
|
||||
if (prepare_ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret;
|
||||
return prepare_ret;
|
||||
}
|
||||
auto input_boxes = reinterpret_cast<float *>(in_tensors_.at(0)->MutableData());
|
||||
auto input_scores = reinterpret_cast<float *>(in_tensors_.at(1)->MutableData());
|
||||
|
||||
|
|
|
@ -57,11 +57,6 @@ int EluRun(void *cdata, int task_id) {
|
|||
}
|
||||
|
||||
int EluCPUKernel::Run() {
|
||||
auto prepare_ret = Prepare();
|
||||
if (prepare_ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret;
|
||||
return prepare_ret;
|
||||
}
|
||||
input_addr = reinterpret_cast<float *>(in_tensors_.front()->MutableData());
|
||||
output_addr = reinterpret_cast<float *>(out_tensors_.front()->MutableData());
|
||||
|
||||
|
|
|
@ -72,12 +72,6 @@ int EmbeddingLookupRun(void *cdata, int task_id) {
|
|||
}
|
||||
|
||||
int EmbeddingLookupCPUKernel::Run() {
|
||||
auto prepare_ret = Prepare();
|
||||
if (prepare_ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret;
|
||||
return prepare_ret;
|
||||
}
|
||||
|
||||
MS_ASSERT(context_->allocator != nullptr);
|
||||
input_addr_ = reinterpret_cast<float *>(context_->allocator->Malloc(
|
||||
sizeof(float) * embedding_lookup_parameter_->layer_size_ * embedding_lookup_parameter_->layer_num_));
|
||||
|
|
|
@ -69,11 +69,6 @@ int ExpRun(void *cdata, int task_id) {
|
|||
}
|
||||
|
||||
int ExpCPUKernel::Run() {
|
||||
auto prepare_ret = Prepare();
|
||||
if (prepare_ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret;
|
||||
return prepare_ret;
|
||||
}
|
||||
input_addr_ = reinterpret_cast<float *>(in_tensors_.front()->MutableData());
|
||||
output_addr_ = reinterpret_cast<float *>(out_tensors_.front()->MutableData());
|
||||
exp_parameter_->element_num_ = in_tensors_.front()->ElementsNum();
|
||||
|
|
|
@ -77,11 +77,6 @@ int ExpandDimsRun(void *cdata, int task_id) {
|
|||
}
|
||||
|
||||
int ExpandDimsCPUKernel::Run() {
|
||||
auto prepare_ret = Prepare();
|
||||
if (prepare_ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret;
|
||||
return prepare_ret;
|
||||
}
|
||||
in_ptr_ = in_tensors_.at(0)->MutableData();
|
||||
out_ptr_ = out_tensors_.at(0)->MutableData();
|
||||
auto ret = ParallelLaunch(this->context_->thread_pool_, ExpandDimsRun, this, thread_sz_count_);
|
||||
|
|
|
@ -67,11 +67,6 @@ int FillRun(void *cdata, int task_id) {
|
|||
}
|
||||
|
||||
int FillCPUKernel::Run() {
|
||||
auto prepare_ret = Prepare();
|
||||
if (prepare_ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret;
|
||||
return prepare_ret;
|
||||
}
|
||||
auto fillData = in_tensors_.at(in_tensors_.size() - 1);
|
||||
auto output = out_tensors_.front();
|
||||
auto fill_data = reinterpret_cast<float *>(fillData->MutableData());
|
||||
|
|
|
@ -44,11 +44,6 @@ int FlattenCPUKernel::ReSize() {
|
|||
}
|
||||
|
||||
int FlattenCPUKernel::Run() {
|
||||
auto prepare_ret = Prepare();
|
||||
if (prepare_ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret;
|
||||
return prepare_ret;
|
||||
}
|
||||
auto input = reinterpret_cast<float *>(in_tensors_[0]->MutableData());
|
||||
auto output = reinterpret_cast<float *>(out_tensors_[0]->MutableData());
|
||||
Flatten(input, output, flatten_param_);
|
||||
|
|
|
@ -162,11 +162,6 @@ int FullconnectionCPUKernel::DoMatmul(int task_id) {
|
|||
}
|
||||
|
||||
int FullconnectionCPUKernel::Run() {
|
||||
auto prepare_ret = Prepare();
|
||||
if (prepare_ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret;
|
||||
return prepare_ret;
|
||||
}
|
||||
auto a_ptr = reinterpret_cast<float *>(in_tensors_.at(0)->data_c());
|
||||
auto b_ptr = reinterpret_cast<float *>(in_tensors_.at(1)->data_c());
|
||||
c_ptr_ = reinterpret_cast<float *>(out_tensors_.at(0)->data_c());
|
||||
|
|
|
@ -18,6 +18,8 @@
|
|||
#include "src/kernel_registry.h"
|
||||
|
||||
using mindspore::lite::KernelRegistrar;
|
||||
using mindspore::lite::RET_ERROR;
|
||||
using mindspore::lite::RET_OK;
|
||||
using mindspore::schema::PrimitiveType_FusedBatchNorm;
|
||||
|
||||
namespace mindspore::kernel {
|
||||
|
@ -84,11 +86,6 @@ int FusedBatchnormCPUKernel::InitConstTensor() {
|
|||
}
|
||||
|
||||
int FusedBatchnormCPUKernel::Run() {
|
||||
auto ret = Prepare();
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Prepare fail! Ret error code: " << ret;
|
||||
return ret;
|
||||
}
|
||||
auto param = reinterpret_cast<BatchNormParameter *>(op_parameter_);
|
||||
if (is_train() && in_tensors_.size() >= 5) {
|
||||
float *in = static_cast<float *>(in_tensors_[0]->MutableData());
|
||||
|
@ -108,7 +105,7 @@ int FusedBatchnormCPUKernel::Run() {
|
|||
memcpy(offset_, bias, in_tensors_[2]->Size());
|
||||
trained_ = true; // trained at least once
|
||||
}
|
||||
ret = ParallelLaunch(this->context_->thread_pool_, BatchNormRun, this, op_parameter_->thread_num_);
|
||||
auto ret = ParallelLaunch(this->context_->thread_pool_, BatchNormRun, this, op_parameter_->thread_num_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "BatchnormRun error error_code[" << ret << "]";
|
||||
}
|
||||
|
@ -137,7 +134,7 @@ int FusedBatchnormCPUKernel::DoExecute(int task_id) {
|
|||
auto param = reinterpret_cast<BatchNormParameter *>(op_parameter_);
|
||||
FusedBatchNormFp32(in_tensors_.at(0)->MutableData(), scale_, offset_, mean_, variance_, param, task_id,
|
||||
out_tensors_.at(0)->MutableData());
|
||||
return mindspore::lite::RET_OK;
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
kernel::LiteKernel *CpuFusedBatchnormKernelCreator(const std::vector<lite::Tensor *> &inputs,
|
||||
|
|
|
@ -91,12 +91,6 @@ int GatherRun(void *cdata, int task_id) {
|
|||
}
|
||||
|
||||
int GatherCPUKernel::Run() {
|
||||
auto prepare_ret = Prepare();
|
||||
if (prepare_ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret;
|
||||
return prepare_ret;
|
||||
}
|
||||
|
||||
auto indices_tensor = in_tensors_.at(1);
|
||||
int indices_num = indices_tensor->ElementsNum();
|
||||
bool isIndicesInt32 = indices_tensor->data_type() == kNumberTypeInt32;
|
||||
|
|
|
@ -116,11 +116,6 @@ int GatherNdRun(void *cdata, int task_id) {
|
|||
}
|
||||
|
||||
int GatherNdCPUKernel::Run() {
|
||||
auto prepare_ret = Prepare();
|
||||
if (prepare_ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret;
|
||||
return prepare_ret;
|
||||
}
|
||||
in_ptr_ = reinterpret_cast<float *>(in_tensors_.front()->MutableData());
|
||||
out_ptr_ = reinterpret_cast<float *>(out_tensors_.front()->MutableData());
|
||||
auto ret = ParallelLaunch(this->context_->thread_pool_, GatherNdRun, this, thread_sz_count_);
|
||||
|
|
|
@ -141,17 +141,12 @@ int L2NormTrailingAxisRun(void *cdata, int task_id) {
|
|||
}
|
||||
|
||||
int L2NormCPUKernel::Run() {
|
||||
auto ret = Prepare();
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Prepare fail! ret: " << ret;
|
||||
return ret;
|
||||
}
|
||||
auto input_shape = in_tensors().at(kInputIndex)->shape();
|
||||
input_ptr_ = reinterpret_cast<float *>(in_tensors_.at(kInputIndex)->MutableData());
|
||||
output_ptr_ = reinterpret_cast<float *>(out_tensors_.at(kOutputIndex)->MutableData());
|
||||
if (l2_norm_param_->axis_num_ == 0 || l2_norm_param_->axis_num_ == input_shape.size()) {
|
||||
// all axis
|
||||
ret = ParallelLaunch(this->context_->thread_pool_, SquareSumRun, this, context_->thread_num_);
|
||||
auto ret = ParallelLaunch(this->context_->thread_pool_, SquareSumRun, this, context_->thread_num_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "L2Norm error: error_code[" << ret << "]";
|
||||
return RET_ERROR;
|
||||
|
@ -167,7 +162,7 @@ int L2NormCPUKernel::Run() {
|
|||
return RET_ERROR;
|
||||
}
|
||||
} else if (l2_norm_param_->axis_num_ == 1 && l2_norm_param_->axis_[0] == static_cast<int>(input_shape.size()) - 1) {
|
||||
ret = ParallelLaunch(this->context_->thread_pool_, L2NormTrailingAxisRun, this, context_->thread_num_);
|
||||
auto ret = ParallelLaunch(this->context_->thread_pool_, L2NormTrailingAxisRun, this, context_->thread_num_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "L2Norm error: error_code[" << ret << "]";
|
||||
return RET_ERROR;
|
||||
|
|
|
@ -74,11 +74,6 @@ int LocalResponseNormRun(void *cdata, int task_id) {
|
|||
}
|
||||
|
||||
int LocalResponseNormCPUKernel::Run() {
|
||||
auto prepare_ret = Prepare();
|
||||
if (prepare_ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret;
|
||||
return prepare_ret;
|
||||
}
|
||||
int error_code = ParallelLaunch(this->context_->thread_pool_, LocalResponseNormRun, this, thread_count_);
|
||||
if (error_code != RET_OK) {
|
||||
MS_LOG(ERROR) << "LocalResponseNorm function error error_code[" << error_code << "]";
|
||||
|
|
|
@ -147,11 +147,6 @@ int LstmCPUKernel::ReSize() {
|
|||
}
|
||||
|
||||
int LstmCPUKernel::Run() {
|
||||
auto prepare_ret = Prepare();
|
||||
if (prepare_ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret;
|
||||
return prepare_ret;
|
||||
}
|
||||
auto input = in_tensors_.at(kInputIndex);
|
||||
MS_ASSERT(input != nullptr);
|
||||
auto hidden_state = in_tensors_.at(4);
|
||||
|
|
|
@ -281,11 +281,6 @@ int MatmulFloatRun(void *cdata, int task_id) {
|
|||
}
|
||||
|
||||
int MatmulCPUKernel::Run() {
|
||||
auto prepare_ret = Prepare();
|
||||
if (prepare_ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret;
|
||||
return prepare_ret;
|
||||
}
|
||||
auto a_src = reinterpret_cast<float *>(in_tensors_[0]->data_c());
|
||||
auto b_src = reinterpret_cast<float *>(in_tensors_[1]->data_c());
|
||||
auto c_src = reinterpret_cast<float *>(out_tensors_[0]->data_c());
|
||||
|
|
|
@ -28,11 +28,6 @@ int Nchw2NhwcCPUKernel::Init() { return RET_OK; }
|
|||
int Nchw2NhwcCPUKernel::ReSize() { return RET_OK; }
|
||||
|
||||
int Nchw2NhwcCPUKernel::Run() {
|
||||
auto prepare_ret = Prepare();
|
||||
if (prepare_ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret;
|
||||
return prepare_ret;
|
||||
}
|
||||
auto input = in_tensors_[0];
|
||||
auto output = out_tensors_[0];
|
||||
|
||||
|
|
|
@ -28,11 +28,6 @@ int Nhwc2NchwCPUKernel::Init() { return RET_OK; }
|
|||
int Nhwc2NchwCPUKernel::ReSize() { return RET_OK; }
|
||||
|
||||
int Nhwc2NchwCPUKernel::Run() {
|
||||
auto prepare_ret = Prepare();
|
||||
if (prepare_ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret;
|
||||
return prepare_ret;
|
||||
}
|
||||
auto input = in_tensors_[0];
|
||||
auto output = out_tensors_[0];
|
||||
|
||||
|
|
|
@ -161,11 +161,6 @@ int OneHotCPUKernel::GetParams() {
|
|||
}
|
||||
|
||||
int OneHotCPUKernel::Run() {
|
||||
auto prepare_ret = Prepare();
|
||||
if (prepare_ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret;
|
||||
return prepare_ret;
|
||||
}
|
||||
int error_code = ParallelLaunch(this->context_->thread_pool_, RunOneHot, this, context_->thread_num_);
|
||||
if (error_code != RET_OK) {
|
||||
MS_LOG(ERROR) << "OneHot function error error_code[" << error_code << "]";
|
||||
|
|
|
@ -227,12 +227,6 @@ int PadCPUKernel::HandleMirrorPad() {
|
|||
}
|
||||
|
||||
int PadCPUKernel::Run() {
|
||||
auto prepare_ret = Prepare();
|
||||
if (prepare_ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret;
|
||||
return prepare_ret;
|
||||
}
|
||||
|
||||
int error_code;
|
||||
if (pad_param_->pad_mode_ == static_cast<int>(schema::PaddingMode_CONSTANT)) {
|
||||
auto output = out_tensors_.at(0);
|
||||
|
|
|
@ -84,11 +84,6 @@ int PoolingImpl(void *cdata, int task_id) {
|
|||
}
|
||||
|
||||
int PoolingCPUKernel::Run() {
|
||||
auto prepare_ret = Prepare();
|
||||
if (prepare_ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret;
|
||||
return prepare_ret;
|
||||
}
|
||||
int error_code = ParallelLaunch(this->context_->thread_pool_, PoolingImpl, this, thread_count_);
|
||||
if (error_code != RET_OK) {
|
||||
MS_LOG(ERROR) << "pooling error error_code[" << error_code << "]";
|
||||
|
|
|
@ -41,11 +41,6 @@ int PowerImpl(void *cdata, int task_id) {
|
|||
}
|
||||
|
||||
int PowerCPUKernel::Run() {
|
||||
auto prepare_ret = Prepare();
|
||||
if (prepare_ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret;
|
||||
return prepare_ret;
|
||||
}
|
||||
auto ret = ParallelLaunch(this->context_->thread_pool_, PowerImpl, this, thread_count_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "PowerCPUKernel error: " << ret;
|
||||
|
|
|
@ -107,11 +107,6 @@ int PReluCPUKernel::ProcessShareChannelInput() {
|
|||
}
|
||||
|
||||
int PReluCPUKernel::Run() {
|
||||
auto prepare_ret = Prepare();
|
||||
if (prepare_ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret;
|
||||
return prepare_ret;
|
||||
}
|
||||
MS_ASSERT(in_shape.size() >= 2);
|
||||
auto input_tensor = in_tensors_[0];
|
||||
ori_input_ = reinterpret_cast<float *>(input_tensor->MutableData());
|
||||
|
|
|
@ -32,11 +32,6 @@ int RangeCPUKernel::Init() { return RET_OK; }
|
|||
int RangeCPUKernel::ReSize() { return RET_OK; }
|
||||
|
||||
int RangeCPUKernel::Run() {
|
||||
auto prepare_ret = Prepare();
|
||||
if (prepare_ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret;
|
||||
return prepare_ret;
|
||||
}
|
||||
size_t start = (reinterpret_cast<RangeParameter *>(op_parameter_))->start_;
|
||||
size_t limit = (reinterpret_cast<RangeParameter *>(op_parameter_))->limit_;
|
||||
size_t delta = (reinterpret_cast<RangeParameter *>(op_parameter_))->delta_;
|
||||
|
|
|
@ -32,11 +32,6 @@ int RankCPUKernel::Init() { return RET_OK; }
|
|||
int RankCPUKernel::ReSize() { return RET_OK; }
|
||||
|
||||
int RankCPUKernel::Run() {
|
||||
auto prepare_ret = Prepare();
|
||||
if (prepare_ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret;
|
||||
return prepare_ret;
|
||||
}
|
||||
auto output_ptr = reinterpret_cast<float *>(out_tensors_.at(0)->MutableData());
|
||||
auto in_shape = in_tensors_[0]->shape();
|
||||
auto rank = in_shape.size();
|
||||
|
|
|
@ -113,11 +113,6 @@ int ReduceImpl(void *cdata, int task_id) {
|
|||
}
|
||||
|
||||
int ReduceCPUKernel::Run() {
|
||||
auto prepare_ret = Prepare();
|
||||
if (prepare_ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret;
|
||||
return prepare_ret;
|
||||
}
|
||||
if (in_tensors().at(0)->data_type() == kNumberTypeFloat32) {
|
||||
data_type_ = kDataTypeFloat;
|
||||
} else {
|
||||
|
@ -129,8 +124,8 @@ int ReduceCPUKernel::Run() {
|
|||
return ret;
|
||||
}
|
||||
|
||||
src_data_ = in_tensors_.at(0)->MutableData();
|
||||
PreProcess();
|
||||
src_data_ = in_tensors_.at(0)->data_c();
|
||||
HandleASumAndSumSquare();
|
||||
for (size_t i = 0; i < static_cast<size_t>(num_axes_); ++i) {
|
||||
if (i != static_cast<size_t>(num_axes_ - 1)) {
|
||||
dst_data_ = data_buffers_[i];
|
||||
|
@ -159,12 +154,12 @@ int ReduceCPUKernel::Run() {
|
|||
return RET_OK;
|
||||
}
|
||||
|
||||
void ReduceCPUKernel::PreProcess() {
|
||||
void ReduceCPUKernel::HandleASumAndSumSquare() {
|
||||
if (data_type_ == kDataTypeInt) {
|
||||
return;
|
||||
}
|
||||
int num = in_tensors_.at(0)->ElementsNum();
|
||||
float *data = reinterpret_cast<float *>(in_tensors_.at(0)->MutableData());
|
||||
float *data = reinterpret_cast<float *>(in_tensors_.at(0)->data_c());
|
||||
if (data == nullptr) {
|
||||
return;
|
||||
}
|
||||
|
|
|
@ -65,7 +65,7 @@ class ReduceCPUKernel : public ReduceBaseCPUKernel {
|
|||
int MallocTmpBuffer();
|
||||
void FreeTmpBuffer();
|
||||
int CalculateCoeffOutput();
|
||||
void PreProcess();
|
||||
void HandleASumAndSumSquare();
|
||||
};
|
||||
} // namespace mindspore::kernel
|
||||
|
||||
|
|
|
@ -36,11 +36,6 @@ int ReshapeCPUKernel::Init() {
|
|||
int ReshapeCPUKernel::ReSize() { return RET_OK; }
|
||||
|
||||
int ReshapeCPUKernel::Run() {
|
||||
auto ret = Prepare();
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Prepare fail!ret: " << ret;
|
||||
return ret;
|
||||
}
|
||||
auto input_ptr = in_tensors_.at(kInputIndex)->MutableData();
|
||||
auto output_ptr = out_tensors_.at(kOutputIndex)->MutableData();
|
||||
size_t data_size = in_tensors_.at(kInputIndex)->Size();
|
||||
|
|
|
@ -204,11 +204,6 @@ int ResizeCPUKernel::RunImpl(int task_id) {
|
|||
}
|
||||
|
||||
int ResizeCPUKernel::Run() {
|
||||
auto ret = Prepare();
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Prepare failed.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
int error_code = ParallelLaunch(this->context_->thread_pool_, ResizeImpl, this, context_->thread_num_);
|
||||
if (error_code != RET_OK) {
|
||||
MS_LOG(ERROR) << "Resize run error, error_code[" << error_code << "]";
|
||||
|
|
|
@ -125,14 +125,9 @@ int ReverseCPUKernel::DoReverse(int task_id) {
|
|||
}
|
||||
|
||||
int ReverseCPUKernel::Run() {
|
||||
auto ret = Prepare();
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Prepare failed.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
in_ptr_ = reinterpret_cast<float *>(in_tensors_[0]->MutableData());
|
||||
out_ptr_ = reinterpret_cast<float *>(out_tensors_[0]->MutableData());
|
||||
ret = ParallelLaunch(this->context_->thread_pool_, ReverseRun, this, thread_sz_count_);
|
||||
auto ret = ParallelLaunch(this->context_->thread_pool_, ReverseRun, this, thread_sz_count_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Reverse run error error_code[" << ret << "]";
|
||||
return ret;
|
||||
|
|
|
@ -87,11 +87,6 @@ int ReverseSequenceCPUKernel::ReSize() {
|
|||
}
|
||||
|
||||
int ReverseSequenceCPUKernel::Run() {
|
||||
auto ret = Prepare();
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Prepare fail!ret: " << ret;
|
||||
return ret;
|
||||
}
|
||||
float *input0 = reinterpret_cast<float *>(in_tensors_.at(0)->MutableData());
|
||||
void *input1 = in_tensors_.at(1)->MutableData();
|
||||
float *output = reinterpret_cast<float *>(out_tensors_.at(0)->MutableData());
|
||||
|
|
|
@ -93,15 +93,10 @@ int ROIPoolingRun(void *cdata, int task_id) {
|
|||
}
|
||||
|
||||
int ROIPoolingCPUKernel::Run() {
|
||||
auto ret = Prepare();
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Prepare fail! ret: " << ret;
|
||||
return ret;
|
||||
}
|
||||
in_ptr_ = reinterpret_cast<float *>(in_tensors_.front()->MutableData());
|
||||
out_ptr_ = reinterpret_cast<float *>(out_tensors_.front()->MutableData());
|
||||
roi_ptr_ = reinterpret_cast<float *>(in_tensors_.at(1)->MutableData());
|
||||
ret = ParallelLaunch(this->context_->thread_pool_, ROIPoolingRun, this, param_->thread_num_);
|
||||
auto ret = ParallelLaunch(this->context_->thread_pool_, ROIPoolingRun, this, param_->thread_num_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "ROIPooling error: error_code[" << ret << "]";
|
||||
return ret;
|
||||
|
|
|
@ -174,11 +174,6 @@ int ScaleRun(void *cdata, int task_id) {
|
|||
}
|
||||
|
||||
int ScaleCPUKernel::Run() {
|
||||
auto ret = Prepare();
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Prepare fail!ret: " << ret;
|
||||
return ret;
|
||||
}
|
||||
auto in_tensor = in_tensors_.front();
|
||||
input_ptr_ = reinterpret_cast<float *>(in_tensor->data_c());
|
||||
if (!scale_param_->const_scale_) {
|
||||
|
@ -193,7 +188,7 @@ int ScaleCPUKernel::Run() {
|
|||
auto out_tensor = out_tensors_.front();
|
||||
output_ptr_ = reinterpret_cast<float *>(out_tensor->MutableData());
|
||||
|
||||
ret = ParallelLaunch(this->context_->thread_pool_, ScaleRun, this, op_parameter_->thread_num_);
|
||||
auto ret = ParallelLaunch(this->context_->thread_pool_, ScaleRun, this, op_parameter_->thread_num_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Scale error error_code[" << ret << "]";
|
||||
return RET_ERROR;
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue