forked from mindspore-Ecosystem/mindspore
!4556 [MS][LITE]add fp16 split
Merge pull request !4556 from 张学同/to_merge
This commit is contained in:
commit
5f27ff4afe
|
@ -0,0 +1,143 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#include "nnacl/fp16/cast_fp16.h"
|
||||
#include "nnacl/fp16/split_fp16.h"
|
||||
#include "src/runtime/kernel/arm/fp16/split_fp16.h"
|
||||
#include "src/runtime/kernel/arm/base/split_base.h"
|
||||
#include "src/runtime/kernel/arm/nnacl/split.h"
|
||||
#include "src/runtime/kernel/arm/nnacl/split_parameter.h"
|
||||
#include "src/kernel_registry.h"
|
||||
#include "include/errorcode.h"
|
||||
#include "src/runtime/runtime_api.h"
|
||||
|
||||
using mindspore::kernel::KERNEL_ARCH::kCPU;
|
||||
using mindspore::lite::KernelRegistrar;
|
||||
using mindspore::lite::RET_ERROR;
|
||||
using mindspore::lite::RET_OK;
|
||||
using mindspore::schema::PrimitiveType_Split;
|
||||
|
||||
namespace mindspore::kernel {
|
||||
|
||||
int SplitFp16CPUKernel::Init() {
|
||||
auto ret = SplitBaseCPUKernel::Init();
|
||||
if (ret != RET_OK) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
output_ptr_.resize(param->num_split_);
|
||||
|
||||
if (!InferShapeDone()) {
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
return ReSize();
|
||||
}
|
||||
|
||||
int SplitFp16CPUKernel::ReSize() { return SplitBaseCPUKernel::ReSize(); }
|
||||
|
||||
int SplitFp16CPUKernel::Split(int task_id) {
|
||||
int num_unit_thread = MSMIN(thread_n_stride_, num_unit_ - task_id * thread_n_stride_);
|
||||
if (num_unit_thread <= 0) {
|
||||
return RET_OK;
|
||||
}
|
||||
int thread_offset = task_id * thread_n_stride_;
|
||||
auto ret = DoSplitFp16(input_ptr_, output_ptr_.data(), in_tensors_.front()->shape().data(), thread_offset,
|
||||
num_unit_thread, param);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Split error task_id[" << task_id << "] error_code[" << ret << "]";
|
||||
return RET_ERROR;
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int SplitRun(int task_id, LiteParallelGroupEnv *penv, void *cdata) {
|
||||
auto g_kernel = reinterpret_cast<SplitFp16CPUKernel *>(cdata);
|
||||
auto ret = g_kernel->Split(task_id);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "SplitRun error task_id[" << task_id << "] error_code[" << ret << "]";
|
||||
return RET_ERROR;
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int SplitFp16CPUKernel::Run() {
|
||||
auto ret = Prepare();
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Prepare failed.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
auto in_tensor = in_tensors_.front();
|
||||
if (in_tensor->data_type() == kNumberTypeFloat32) {
|
||||
input_ptr_ =
|
||||
reinterpret_cast<float16_t *>(context_->allocator->Malloc(in_tensor->ElementsNum() * sizeof(float16_t)));
|
||||
Float32ToFloat16(reinterpret_cast<float *>(in_tensor->Data()), input_ptr_, in_tensor->ElementsNum());
|
||||
} else {
|
||||
input_ptr_ = reinterpret_cast<float16_t *>(in_tensor->Data());
|
||||
}
|
||||
for (int i = 0; i < param->num_split_; i++) {
|
||||
if (in_tensor->data_type() == kNumberTypeFloat32) {
|
||||
output_ptr_[i] = reinterpret_cast<float16_t *>(
|
||||
context_->allocator->Malloc(out_tensors_.at(i)->ElementsNum() * sizeof(float16_t)));
|
||||
Float32ToFloat16(reinterpret_cast<float *>(out_tensors_.at(i)->Data()), output_ptr_[i],
|
||||
out_tensors_.at(i)->ElementsNum());
|
||||
} else {
|
||||
output_ptr_[i] = reinterpret_cast<float16_t *>(out_tensors_.at(i)->Data());
|
||||
}
|
||||
}
|
||||
ret = LiteBackendParallelLaunch(SplitRun, this, thread_n_num_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "split error error_code[" << ret << "]";
|
||||
return RET_ERROR;
|
||||
}
|
||||
if (in_tensor->data_type() == kNumberTypeFloat32) {
|
||||
context_->allocator->Free(input_ptr_);
|
||||
input_ptr_ = nullptr;
|
||||
}
|
||||
for (int i = 0; i < param->num_split_; i++) {
|
||||
if (in_tensor->data_type() == kNumberTypeFloat32) {
|
||||
context_->allocator->Free(output_ptr_[i]);
|
||||
output_ptr_[i] = nullptr;
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
}
|
||||
|
||||
kernel::LiteKernel *CpuSplitFp16KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,
|
||||
const std::vector<lite::tensor::Tensor *> &outputs,
|
||||
OpParameter *opParameter, const Context *ctx,
|
||||
const kernel::KernelKey &desc, const lite::Primitive *primitive) {
|
||||
if (opParameter == nullptr) {
|
||||
MS_LOG(ERROR) << "Input opParameter is nullptr!";
|
||||
return nullptr;
|
||||
}
|
||||
MS_ASSERT(desc.type == schema::PrimitiveType_Split);
|
||||
auto *kernel = new (std::nothrow) SplitFp16CPUKernel(opParameter, inputs, outputs, ctx, primitive);
|
||||
if (kernel == nullptr) {
|
||||
MS_LOG(ERROR) << "new SplitFp16CPUKernel fail!";
|
||||
return nullptr;
|
||||
}
|
||||
auto ret = kernel->Init();
|
||||
if (ret != RET_OK) {
|
||||
delete kernel;
|
||||
MS_LOG(ERROR) << "Init kernel failed, name: " << opParameter->name_ << ", type: "
|
||||
<< schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(opParameter->type_));
|
||||
return nullptr;
|
||||
}
|
||||
return kernel;
|
||||
}
|
||||
REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_Split, CpuSplitFp16KernelCreator)
|
||||
|
||||
} // namespace mindspore::kernel
|
|
@ -0,0 +1,45 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP16_SPLIT_H_
|
||||
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP16_SPLIT_H_
|
||||
#include <arm_neon.h>
|
||||
|
||||
#include <vector>
|
||||
#include "src/runtime/kernel/arm/base/split_base.h"
|
||||
#include "src/lite_kernel.h"
|
||||
|
||||
namespace mindspore::kernel {
|
||||
class SplitFp16CPUKernel : public SplitBaseCPUKernel {
|
||||
public:
|
||||
SplitFp16CPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs,
|
||||
const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx,
|
||||
const lite::Primitive *primitive)
|
||||
: SplitBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {}
|
||||
~SplitFp16CPUKernel() override = default;
|
||||
|
||||
int Init() override;
|
||||
int ReSize() override;
|
||||
int Run() override;
|
||||
int Split(int task_id);
|
||||
|
||||
private:
|
||||
float16_t *input_ptr_;
|
||||
std::vector<float16_t *> output_ptr_;
|
||||
};
|
||||
} // namespace mindspore::kernel
|
||||
|
||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP16_SPLIT_H_
|
|
@ -0,0 +1,59 @@
|
|||
/**
|
||||
* Copyright 2019 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "nnacl/fp16/split_fp16.h"
|
||||
#include "nnacl/split_parameter.h"
|
||||
#include <string.h>
|
||||
#include "nnacl/errorcode.h"
|
||||
|
||||
int DoSplitFp16(float16_t *in_data, float16_t **out_data, const int *input_shape, int offset, int num_unit,
|
||||
SplitParameter *split_param) {
|
||||
if (in_data == NULL || out_data == NULL) {
|
||||
return NNACL_ERR;
|
||||
}
|
||||
int num_split = split_param->num_split_;
|
||||
int *split_sizes = split_param->split_sizes_;
|
||||
int *strides = split_param->strides_;
|
||||
int split_dim = split_param->split_dim_;
|
||||
int in_stride = strides[split_dim];
|
||||
|
||||
float16_t *src;
|
||||
int size_float = (int)(sizeof(float16_t));
|
||||
int in_stride_bytes = in_stride * size_float;
|
||||
|
||||
int split_which;
|
||||
int split_times;
|
||||
int stride_per_split = in_stride * input_shape[split_dim];
|
||||
|
||||
split_which = offset % num_split;
|
||||
split_times = offset / num_split;
|
||||
src = in_data + split_times * stride_per_split;
|
||||
|
||||
for (int i = 0; i < split_which; i++) {
|
||||
src += split_sizes[i] * in_stride;
|
||||
}
|
||||
|
||||
for (int i = offset; i < offset + num_unit; i++) {
|
||||
split_which = i % num_split;
|
||||
split_times = i / num_split;
|
||||
int split_size = split_sizes[split_which];
|
||||
float16_t *dst = out_data[split_which] + split_times * in_stride * split_size;
|
||||
(void)memcpy(dst, src, split_size * in_stride_bytes);
|
||||
src += split_size * in_stride;
|
||||
}
|
||||
|
||||
return NNACL_OK;
|
||||
}
|
|
@ -0,0 +1,33 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_SPLITFP16_H_
|
||||
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_SPLITFP16_H_
|
||||
|
||||
#include <arm_neon.h>
|
||||
#include "nnacl/op_base.h"
|
||||
#include "nnacl/split_parameter.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
int DoSplitFp16(float16_t *in_data, float16_t **out_data, const int *input_shape, int offset, int num_unit,
|
||||
SplitParameter *split_param);
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_SPLIT_H_
|
Loading…
Reference in New Issue