!11661 tile range fp16

From: @zhaozhenlong
Reviewed-by: 
Signed-off-by:
This commit is contained in:
mindspore-ci-bot 2021-01-29 09:38:59 +08:00 committed by Gitee
commit 56623b07b3
11 changed files with 341 additions and 140 deletions

View File

@ -0,0 +1,60 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "nnacl/base/tile_base.h"
#include <string.h>
void DoCopyData(const uint8_t *input_data, uint8_t *output_data, size_t size, size_t data_size, size_t multiple) {
uint8_t *out_data = output_data;
for (size_t i = 0; i < multiple; ++i) {
(void)memcpy(out_data, input_data, size * sizeof(uint8_t) * data_size);
out_data += size * data_size;
}
}
int DoTileOneDimension(uint8_t *input_data, uint8_t *output_data, size_t dim, TileParameter *parameter) {
size_t src_dim_size = parameter->in_shape_[dim];
if (dim == parameter->in_dim_ - 1) {
DoCopyData(input_data, output_data, src_dim_size, parameter->data_size_, parameter->multiples_[dim]);
return 0;
}
for (size_t i = 0; i < src_dim_size; ++i) {
for (size_t j = 0; j < parameter->multiples_[dim]; ++j) {
size_t in_pos = parameter->in_strides_[dim] * i;
size_t out_pos = parameter->out_strides_[dim] * (i + j * src_dim_size);
DoTileOneDimension(input_data + in_pos * parameter->data_size_, output_data + out_pos * parameter->data_size_,
dim + 1, parameter);
}
}
return 0;
}
void Tile(void *input_data, void *output_data, TileParameter *parameter) {
DoTileOneDimension((uint8_t *)input_data, (uint8_t *)output_data, 0, parameter);
}
void TileSimple(void *input_data, void *output_data, size_t begin, size_t end, TileParameter *parameter) {
uint8_t *out_data = output_data;
uint8_t *in_data = input_data;
for (size_t i = begin; i < end; ++i) {
uint8_t *src = in_data + i * parameter->fast_stride_ * parameter->data_size_;
uint8_t *dst = out_data + i * parameter->fast_stride_ * parameter->fast_multiple_ * parameter->data_size_;
for (size_t m = 0; m < parameter->fast_multiple_; ++m) {
(void)memcpy(dst, src, parameter->fast_stride_ * parameter->data_size_);
dst += parameter->fast_stride_ * parameter->data_size_;
}
}
}

View File

@ -14,8 +14,8 @@
* limitations under the License.
*/
#ifndef MINDSPORE_LITE_NNACL_TILE_H_
#define MINDSPORE_LITE_NNACL_TILE_H_
#ifndef MINDSPORE_LITE_NNACL_BASE_TILE_H_
#define MINDSPORE_LITE_NNACL_BASE_TILE_H_
#include "nnacl/op_base.h"
@ -33,14 +33,19 @@ typedef struct TileParameter {
// other parameter
int in_dim_;
size_t data_size_;
size_t fast_outer_size_;
size_t fast_stride_;
size_t fast_multiple_;
} TileParameter;
#ifdef __cplusplus
extern "C" {
#endif
void Tile(float *input_data, float *output_data, TileParameter *parameter);
void Tile(void *input_data, void *output_data, TileParameter *parameter);
void TileSimple(void *input_data, void *output_data, size_t begin, size_t end, TileParameter *parameter);
#ifdef __cplusplus
}
#endif
#endif // MINDSPORE_LITE_NNACL_TILE_H_
#endif // MINDSPORE_LITE_NNACL_BASE_TILE_H_

View File

@ -1,46 +0,0 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "nnacl/fp32/tile_fp32.h"
#include <string.h>
void DoCopyData(const float *input_data, float *output_data, size_t size, size_t multiple) {
float *out_data = output_data;
for (size_t i = 0; i < multiple; ++i) {
(void)memcpy(out_data, input_data, size * sizeof(float));
out_data += size;
}
}
int DoTileOneDimension(float *input_data, float *output_data, size_t dim, TileParameter *parameter) {
size_t src_dim_size = parameter->in_shape_[dim];
if (dim == parameter->in_dim_ - 1) {
DoCopyData(input_data, output_data, src_dim_size, parameter->multiples_[dim]);
return 0;
}
for (size_t i = 0; i < src_dim_size; ++i) {
for (size_t j = 0; j < parameter->multiples_[dim]; ++j) {
size_t in_pos = parameter->in_strides_[dim] * i;
size_t out_pos = parameter->out_strides_[dim] * (i + j * src_dim_size);
DoTileOneDimension(input_data + in_pos, output_data + out_pos, dim + 1, parameter);
}
}
return 0;
}
void Tile(float *input_data, float *output_data, TileParameter *parameter) {
DoTileOneDimension(input_data, output_data, 0, parameter);
}

View File

@ -17,7 +17,7 @@
#include "src/ops/tile.h"
#include "src/ops/primitive_c.h"
#include "src/ops/populate/populate_register.h"
#include "nnacl/fp32/tile_fp32.h"
#include "nnacl/base/tile_base.h"
namespace mindspore {
namespace lite {

View File

@ -133,7 +133,7 @@ int Squeeze::InferShape(std::vector<Tensor *> inputs_, std::vector<Tensor *> out
}
}
outputs_.front()->set_shape(out_shape);
return 0;
return RET_OK;
}
} // namespace lite
} // namespace mindspore

View File

@ -0,0 +1,153 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "src/runtime/kernel/arm/base/tile_base.h"
#include "src/kernel_registry.h"
#include "include/errorcode.h"
using mindspore::lite::KernelRegistrar;
using mindspore::lite::RET_ERROR;
using mindspore::lite::RET_OK;
using mindspore::schema::PrimitiveType_Tile;
namespace mindspore::kernel {
namespace {
constexpr size_t kDoubleInputsSize = 2;
}
int TileCPUKernel::Init() {
if (!InferShapeDone()) {
return RET_OK;
}
return ReSize();
}
void TileCPUKernel::ComputeStrides(const int *shape, int *strides, int ndim) {
int stride = 1;
for (int i = ndim - 1; i >= 0; i--) {
strides[i] = stride;
stride *= shape[i];
}
}
int TileCPUKernel::ReSize() {
auto tile_parameter_ = reinterpret_cast<TileParameter *>(op_parameter_);
MS_ASSERT(tile_parameter_);
if (in_tensors_.size() == kDoubleInputsSize) {
if (in_tensors_[1]->ElementsNum() > static_cast<int>(in_tensors_[0]->shape().size())) {
MS_LOG(ERROR) << "tile's input1 data_num cannot be larger than input0's shape_size.";
return false;
}
auto input1_addr = reinterpret_cast<int *>(in_tensors_[1]->data_c());
for (int i = 0; i < in_tensors_[1]->ElementsNum(); ++i) {
tile_parameter_->dims_[i] = i;
tile_parameter_->multiples_[i] = input1_addr[i];
}
}
tile_parameter_->in_dim_ = in_tensors_.at(0)->shape().size();
for (int i = 0; i < tile_parameter_->in_dim_; ++i) {
tile_parameter_->in_shape_[i] = in_tensors_.at(0)->shape().at(i);
tile_parameter_->out_shape_[i] = out_tensors_.at(0)->shape().at(i);
}
ComputeStrides(tile_parameter_->in_shape_, tile_parameter_->in_strides_, tile_parameter_->in_dim_);
ComputeStrides(tile_parameter_->out_shape_, tile_parameter_->out_strides_, tile_parameter_->in_dim_);
auto data_type = in_tensors_.at(0)->data_type();
if (data_type == kNumberTypeFloat32 || data_type == kNumberTypeInt32) {
tile_parameter_->data_size_ = sizeof(float);
} else if (data_type == kNumberTypeFloat16) {
tile_parameter_->data_size_ = sizeof(float) / 2;
} else {
MS_LOG(ERROR) << "tile not support data type: " << data_type;
return RET_ERROR;
}
FillOneDimTileParam();
return RET_OK;
}
int SimpleTile(void *cdata, int task_id) {
auto kernel = reinterpret_cast<TileCPUKernel *>(cdata);
auto ret = kernel->SimpleTileImpl(task_id);
if (ret != RET_OK) {
MS_LOG(ERROR) << "SimpleTile error task_id[" << task_id << "] error_code[" << ret << "]";
return ret;
}
return RET_OK;
}
void TileCPUKernel::FillOneDimTileParam() {
// check if tile exact one dim
auto tile_parameter_ = reinterpret_cast<TileParameter *>(op_parameter_);
MS_ASSERT(tile_parameter_);
int large_one_multiple_count = 0;
int multiple;
int mul_index;
for (auto i = 0; i < tile_parameter_->in_dim_; ++i) {
if (tile_parameter_->multiples_[i] > 1) {
large_one_multiple_count++;
multiple = tile_parameter_->multiples_[i];
mul_index = i;
}
}
one_dim_tile_ = large_one_multiple_count == 1;
if (one_dim_tile_) {
tile_parameter_->fast_multiple_ = static_cast<size_t>(multiple);
tile_parameter_->fast_stride_ =
static_cast<size_t>(tile_parameter_->in_shape_[mul_index] * tile_parameter_->in_strides_[mul_index]);
tile_parameter_->fast_outer_size_ =
static_cast<size_t>(in_tensors_.at(0)->ElementsNum()) / tile_parameter_->fast_stride_;
}
return;
}
int TileCPUKernel::SimpleTileImpl(int task_id) {
auto param = reinterpret_cast<TileParameter *>(op_parameter_);
MS_ASSERT(param);
size_t unit = UP_DIV(param->fast_outer_size_, static_cast<size_t>(context_->thread_num_));
if (unit == 0 && task_id > 0) {
return RET_OK;
}
size_t begin = unit * static_cast<size_t>(task_id);
size_t end = MSMIN(begin + unit, param->fast_outer_size_);
TileSimple(input_addr_, output_addr_, begin, end, param);
return RET_OK;
}
int TileCPUKernel::RunSimpleTile() {
auto ret = ParallelLaunch(context_->thread_pool_, SimpleTile, this, context_->thread_num_);
if (ret != RET_OK) {
MS_LOG(ERROR) << "RunSimpleTile error code[" << ret << "]";
return ret;
}
return RET_OK;
}
int TileCPUKernel::Run() {
input_addr_ = reinterpret_cast<uint8_t *>(in_tensors_.at(0)->data_c());
output_addr_ = reinterpret_cast<uint8_t *>(out_tensors_.at(0)->data_c());
MS_ASSERT(input_addr_ != nullptr);
MS_ASSERT(output_addr_ != nullptr);
if (one_dim_tile_) {
return RunSimpleTile();
}
Tile(input_addr_, output_addr_, reinterpret_cast<TileParameter *>(op_parameter_));
return RET_OK;
}
REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_Tile, LiteKernelCreator<TileCPUKernel>)
REG_KERNEL(kCPU, kNumberTypeInt32, PrimitiveType_Tile, LiteKernelCreator<TileCPUKernel>)
REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_Tile, LiteKernelCreator<TileCPUKernel>)
} // namespace mindspore::kernel

View File

@ -13,12 +13,12 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_TILE_H_
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_TILE_H_
#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_BASE_TILE_BASE_H_
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_BASE_TILE_BASE_H_
#include <vector>
#include "src/lite_kernel.h"
#include "nnacl/fp32/tile_fp32.h"
#include "nnacl/base/tile_base.h"
namespace mindspore::kernel {
class TileCPUKernel : public LiteKernel {
@ -32,10 +32,16 @@ class TileCPUKernel : public LiteKernel {
int Init() override;
int ReSize() override;
int Run() override;
int SimpleTileImpl(int task_id);
private:
int RunSimpleTile();
void ComputeStrides(const int *shape, int *strides, int ndim);
void FillOneDimTileParam();
bool one_dim_tile_;
uint8_t *input_addr_;
uint8_t *output_addr_;
};
} // namespace mindspore::kernel
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_TILE_H_
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_BASE_TILE_BASE_H_

View File

@ -38,7 +38,21 @@ int TransposeFp16CPUKernel::Init() {
}
int TransposeFp16CPUKernel::Run() {
MS_ASSERT(in_tensors_.size() == 1);
MS_ASSERT(in_tensors_.size() == 1 || in_tensors_.size() == 2);
TransposeParameter *param = reinterpret_cast<TransposeParameter *>(this->op_parameter_);
if (in_tensors_.size() == 2) {
auto input_perm = in_tensors_.at(1);
MS_ASSERT(input_perm != nullptr);
MS_ASSERT(input_perm->data_c() != nullptr);
int *perm_data = reinterpret_cast<int *>(input_perm->data_c());
for (int i = 0; i < input_perm->ElementsNum(); ++i) {
param->perm_[i] = perm_data[i];
}
for (int i = input_perm->ElementsNum(); i < 8; ++i) {
param->perm_[i] = 0;
}
param->num_axes_ = input_perm->ElementsNum();
}
MS_ASSERT(out_tensors_.size() == 1);
auto &in_tensor = in_tensors_.front();
auto &out_tensor = out_tensors_.front();
@ -51,7 +65,6 @@ int TransposeFp16CPUKernel::Run() {
MS_ASSERT(in_data_fp16_);
MS_ASSERT(out_data_fp16_);
TransposeParameter *param = reinterpret_cast<TransposeParameter *>(this->op_parameter_);
if (in_tensor->shape().size() != static_cast<size_t>(param->num_axes_)) {
memcpy(out_data_fp16_, in_data_fp16_, in_tensor->ElementsNum() * sizeof(float16_t));
return RET_OK;

View File

@ -68,6 +68,7 @@ int RangeCPUKernel::Run() {
return RET_OK;
}
// fp16 may not be necessary because it involves small amount of data (input 3 number, output depends on input)
REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_Range, LiteKernelCreator<RangeCPUKernel>)
REG_KERNEL(kCPU, kNumberTypeFloat, PrimitiveType_Range, LiteKernelCreator<RangeCPUKernel>)
REG_KERNEL(kCPU, kNumberTypeInt32, PrimitiveType_Range, LiteKernelCreator<RangeCPUKernel>)

View File

@ -1,80 +0,0 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "src/runtime/kernel/arm/fp32/tile_fp32.h"
#include "src/kernel_registry.h"
#include "include/errorcode.h"
using mindspore::lite::KernelRegistrar;
using mindspore::lite::RET_ERROR;
using mindspore::lite::RET_OK;
using mindspore::schema::PrimitiveType_Tile;
namespace mindspore::kernel {
namespace {
constexpr size_t kDoubleInputsSize = 2;
}
int TileCPUKernel::Init() {
if (!InferShapeDone()) {
return RET_OK;
}
return ReSize();
}
void TileCPUKernel::ComputeStrides(const int *shape, int *strides, int ndim) {
int stride = 1;
for (int i = ndim - 1; i >= 0; i--) {
strides[i] = stride;
stride *= shape[i];
}
}
int TileCPUKernel::ReSize() {
auto tile_parameter_ = reinterpret_cast<TileParameter *>(op_parameter_);
MS_ASSERT(tile_parameter_);
if (in_tensors_.size() == kDoubleInputsSize) {
if (in_tensors_[1]->ElementsNum() > static_cast<int>(in_tensors_[0]->shape().size())) {
MS_LOG(ERROR) << "tile's input1 data_num cannot be larger than input0's shape_size.";
return false;
}
auto input1_addr = reinterpret_cast<int *>(in_tensors_[1]->data_c());
for (int i = 0; i < in_tensors_[1]->ElementsNum(); ++i) {
tile_parameter_->dims_[i] = i;
tile_parameter_->multiples_[i] = input1_addr[i];
}
}
tile_parameter_->in_dim_ = in_tensors_.at(0)->shape().size();
for (int i = 0; i < tile_parameter_->in_dim_; ++i) {
tile_parameter_->in_shape_[i] = in_tensors_.at(0)->shape().at(i);
tile_parameter_->out_shape_[i] = out_tensors_.at(0)->shape().at(i);
}
ComputeStrides(tile_parameter_->in_shape_, tile_parameter_->in_strides_, tile_parameter_->in_dim_);
ComputeStrides(tile_parameter_->out_shape_, tile_parameter_->out_strides_, tile_parameter_->in_dim_);
return RET_OK;
}
int TileCPUKernel::Run() {
auto input_addr = reinterpret_cast<float *>(in_tensors_.at(0)->data_c());
auto output_addr = reinterpret_cast<float *>(out_tensors_.at(0)->data_c());
MS_ASSERT(input_addr);
MS_ASSERT(output_addr);
Tile(input_addr, output_addr, reinterpret_cast<TileParameter *>(op_parameter_));
return RET_OK;
}
REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_Tile, LiteKernelCreator<TileCPUKernel>)
REG_KERNEL(kCPU, kNumberTypeInt32, PrimitiveType_Tile, LiteKernelCreator<TileCPUKernel>)
} // namespace mindspore::kernel

View File

@ -14,10 +14,9 @@
* limitations under the License.
*/
#include <iostream>
#include <memory>
#include "common/common_test.h"
#include "mindspore/lite/nnacl/fp32/tile_fp32.h"
#include "mindspore/lite/nnacl/base/tile_base.h"
#include "mindspore/lite/src/kernel_registry.h"
namespace mindspore {
@ -68,4 +67,94 @@ TEST_F(TestTileFp32, Tile) {
in_tensor.set_data(nullptr);
out_tensor.set_data(nullptr);
}
TEST_F(TestTileFp32, SimpleTile1) {
lite::Tensor in_tensor(kNumberTypeFloat32, {2, 2});
lite::Tensor out_tensor(kNumberTypeFloat32, {4, 2});
float input_data[] = {1, 2, 3, 4};
float output_data[8] = {0};
in_tensor.set_data(input_data);
out_tensor.set_data(output_data);
std::vector<lite::Tensor *> inputs = {&in_tensor};
std::vector<lite::Tensor *> outputs = {&out_tensor};
TileParameter parameter = {0};
parameter.in_dim_ = 2;
parameter.in_shape_[0] = 2;
parameter.in_shape_[1] = 2;
parameter.multiples_[0] = 2;
parameter.multiples_[1] = 1;
parameter.in_strides_[0] = 2;
parameter.in_strides_[1] = 1;
parameter.out_strides_[0] = 2;
parameter.out_strides_[1] = 1;
kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeFloat32, schema::PrimitiveType_Tile};
auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
EXPECT_NE(creator, nullptr);
auto ctx = std::make_shared<lite::InnerContext>();
ASSERT_EQ(lite::RET_OK, ctx->Init());
auto context = ctx.get();
context->thread_num_ = 2;
auto kernel = creator(inputs, outputs, reinterpret_cast<OpParameter *>(&parameter), context, desc, nullptr);
EXPECT_NE(kernel, nullptr);
auto ret = kernel->Run();
EXPECT_EQ(0, ret);
float expect[] = {1, 2, 3, 4, 1, 2, 3, 4};
for (int i = 0; i < 8; ++i) {
EXPECT_EQ(output_data[i], expect[i]);
}
in_tensor.set_data(nullptr);
out_tensor.set_data(nullptr);
}
TEST_F(TestTileFp32, SimpleTile2) {
lite::Tensor in_tensor(kNumberTypeFloat32, {2, 2});
lite::Tensor out_tensor(kNumberTypeFloat32, {2, 4});
float input_data[] = {1, 2, 3, 4};
float output_data[8] = {0};
in_tensor.set_data(input_data);
out_tensor.set_data(output_data);
std::vector<lite::Tensor *> inputs = {&in_tensor};
std::vector<lite::Tensor *> outputs = {&out_tensor};
TileParameter parameter = {0};
parameter.in_dim_ = 2;
parameter.in_shape_[0] = 2;
parameter.in_shape_[1] = 2;
parameter.multiples_[0] = 1;
parameter.multiples_[1] = 2;
parameter.in_strides_[0] = 2;
parameter.in_strides_[1] = 1;
parameter.out_strides_[0] = 4;
parameter.out_strides_[1] = 1;
kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeFloat32, schema::PrimitiveType_Tile};
auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
EXPECT_NE(creator, nullptr);
auto ctx = std::make_shared<lite::InnerContext>();
ASSERT_EQ(lite::RET_OK, ctx->Init());
auto context = ctx.get();
context->thread_num_ = 2;
auto kernel = creator(inputs, outputs, reinterpret_cast<OpParameter *>(&parameter), context, desc, nullptr);
EXPECT_NE(kernel, nullptr);
auto ret = kernel->Run();
EXPECT_EQ(0, ret);
float expect[] = {1, 2, 1, 2, 3, 4, 3, 4};
for (int i = 0; i < 8; ++i) {
EXPECT_EQ(output_data[i], expect[i]);
}
in_tensor.set_data(nullptr);
out_tensor.set_data(nullptr);
}
} // namespace mindspore