!4556 [MS][LITE]add fp16 split

Merge pull request !4556 from 张学同/to_merge
This commit is contained in:
mindspore-ci-bot 2020-08-17 14:40:42 +08:00 committed by Gitee
commit 5f27ff4afe
4 changed files with 280 additions and 0 deletions

View File

@ -0,0 +1,143 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "nnacl/fp16/cast_fp16.h"
#include "nnacl/fp16/split_fp16.h"
#include "src/runtime/kernel/arm/fp16/split_fp16.h"
#include "src/runtime/kernel/arm/base/split_base.h"
#include "src/runtime/kernel/arm/nnacl/split.h"
#include "src/runtime/kernel/arm/nnacl/split_parameter.h"
#include "src/kernel_registry.h"
#include "include/errorcode.h"
#include "src/runtime/runtime_api.h"
using mindspore::kernel::KERNEL_ARCH::kCPU;
using mindspore::lite::KernelRegistrar;
using mindspore::lite::RET_ERROR;
using mindspore::lite::RET_OK;
using mindspore::schema::PrimitiveType_Split;
namespace mindspore::kernel {
int SplitFp16CPUKernel::Init() {
auto ret = SplitBaseCPUKernel::Init();
if (ret != RET_OK) {
return ret;
}
output_ptr_.resize(param->num_split_);
if (!InferShapeDone()) {
return RET_OK;
}
return ReSize();
}
int SplitFp16CPUKernel::ReSize() { return SplitBaseCPUKernel::ReSize(); }
int SplitFp16CPUKernel::Split(int task_id) {
int num_unit_thread = MSMIN(thread_n_stride_, num_unit_ - task_id * thread_n_stride_);
if (num_unit_thread <= 0) {
return RET_OK;
}
int thread_offset = task_id * thread_n_stride_;
auto ret = DoSplitFp16(input_ptr_, output_ptr_.data(), in_tensors_.front()->shape().data(), thread_offset,
num_unit_thread, param);
if (ret != RET_OK) {
MS_LOG(ERROR) << "Split error task_id[" << task_id << "] error_code[" << ret << "]";
return RET_ERROR;
}
return RET_OK;
}
int SplitRun(int task_id, LiteParallelGroupEnv *penv, void *cdata) {
auto g_kernel = reinterpret_cast<SplitFp16CPUKernel *>(cdata);
auto ret = g_kernel->Split(task_id);
if (ret != RET_OK) {
MS_LOG(ERROR) << "SplitRun error task_id[" << task_id << "] error_code[" << ret << "]";
return RET_ERROR;
}
return RET_OK;
}
int SplitFp16CPUKernel::Run() {
auto ret = Prepare();
if (ret != RET_OK) {
MS_LOG(ERROR) << "Prepare failed.";
return RET_ERROR;
}
auto in_tensor = in_tensors_.front();
if (in_tensor->data_type() == kNumberTypeFloat32) {
input_ptr_ =
reinterpret_cast<float16_t *>(context_->allocator->Malloc(in_tensor->ElementsNum() * sizeof(float16_t)));
Float32ToFloat16(reinterpret_cast<float *>(in_tensor->Data()), input_ptr_, in_tensor->ElementsNum());
} else {
input_ptr_ = reinterpret_cast<float16_t *>(in_tensor->Data());
}
for (int i = 0; i < param->num_split_; i++) {
if (in_tensor->data_type() == kNumberTypeFloat32) {
output_ptr_[i] = reinterpret_cast<float16_t *>(
context_->allocator->Malloc(out_tensors_.at(i)->ElementsNum() * sizeof(float16_t)));
Float32ToFloat16(reinterpret_cast<float *>(out_tensors_.at(i)->Data()), output_ptr_[i],
out_tensors_.at(i)->ElementsNum());
} else {
output_ptr_[i] = reinterpret_cast<float16_t *>(out_tensors_.at(i)->Data());
}
}
ret = LiteBackendParallelLaunch(SplitRun, this, thread_n_num_);
if (ret != RET_OK) {
MS_LOG(ERROR) << "split error error_code[" << ret << "]";
return RET_ERROR;
}
if (in_tensor->data_type() == kNumberTypeFloat32) {
context_->allocator->Free(input_ptr_);
input_ptr_ = nullptr;
}
for (int i = 0; i < param->num_split_; i++) {
if (in_tensor->data_type() == kNumberTypeFloat32) {
context_->allocator->Free(output_ptr_[i]);
output_ptr_[i] = nullptr;
}
return RET_OK;
}
}
kernel::LiteKernel *CpuSplitFp16KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs,
OpParameter *opParameter, const Context *ctx,
const kernel::KernelKey &desc, const lite::Primitive *primitive) {
if (opParameter == nullptr) {
MS_LOG(ERROR) << "Input opParameter is nullptr!";
return nullptr;
}
MS_ASSERT(desc.type == schema::PrimitiveType_Split);
auto *kernel = new (std::nothrow) SplitFp16CPUKernel(opParameter, inputs, outputs, ctx, primitive);
if (kernel == nullptr) {
MS_LOG(ERROR) << "new SplitFp16CPUKernel fail!";
return nullptr;
}
auto ret = kernel->Init();
if (ret != RET_OK) {
delete kernel;
MS_LOG(ERROR) << "Init kernel failed, name: " << opParameter->name_ << ", type: "
<< schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(opParameter->type_));
return nullptr;
}
return kernel;
}
REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_Split, CpuSplitFp16KernelCreator)
} // namespace mindspore::kernel

View File

@ -0,0 +1,45 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP16_SPLIT_H_
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP16_SPLIT_H_
#include <arm_neon.h>
#include <vector>
#include "src/runtime/kernel/arm/base/split_base.h"
#include "src/lite_kernel.h"
namespace mindspore::kernel {
class SplitFp16CPUKernel : public SplitBaseCPUKernel {
public:
SplitFp16CPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx,
const lite::Primitive *primitive)
: SplitBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {}
~SplitFp16CPUKernel() override = default;
int Init() override;
int ReSize() override;
int Run() override;
int Split(int task_id);
private:
float16_t *input_ptr_;
std::vector<float16_t *> output_ptr_;
};
} // namespace mindspore::kernel
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP16_SPLIT_H_

View File

@ -0,0 +1,59 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "nnacl/fp16/split_fp16.h"
#include "nnacl/split_parameter.h"
#include <string.h>
#include "nnacl/errorcode.h"
int DoSplitFp16(float16_t *in_data, float16_t **out_data, const int *input_shape, int offset, int num_unit,
SplitParameter *split_param) {
if (in_data == NULL || out_data == NULL) {
return NNACL_ERR;
}
int num_split = split_param->num_split_;
int *split_sizes = split_param->split_sizes_;
int *strides = split_param->strides_;
int split_dim = split_param->split_dim_;
int in_stride = strides[split_dim];
float16_t *src;
int size_float = (int)(sizeof(float16_t));
int in_stride_bytes = in_stride * size_float;
int split_which;
int split_times;
int stride_per_split = in_stride * input_shape[split_dim];
split_which = offset % num_split;
split_times = offset / num_split;
src = in_data + split_times * stride_per_split;
for (int i = 0; i < split_which; i++) {
src += split_sizes[i] * in_stride;
}
for (int i = offset; i < offset + num_unit; i++) {
split_which = i % num_split;
split_times = i / num_split;
int split_size = split_sizes[split_which];
float16_t *dst = out_data[split_which] + split_times * in_stride * split_size;
(void)memcpy(dst, src, split_size * in_stride_bytes);
src += split_size * in_stride;
}
return NNACL_OK;
}

View File

@ -0,0 +1,33 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_SPLITFP16_H_
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_SPLITFP16_H_
#include <arm_neon.h>
#include "nnacl/op_base.h"
#include "nnacl/split_parameter.h"
#ifdef __cplusplus
extern "C" {
#endif
int DoSplitFp16(float16_t *in_data, float16_t **out_data, const int *input_shape, int offset, int num_unit,
SplitParameter *split_param);
#ifdef __cplusplus
}
#endif
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_SPLIT_H_