forked from mindspore-Ecosystem/mindspore
!4461 [MS][LITE][Develop]crop support parallel
Merge pull request !4461 from chenjianping/lite_dev
This commit is contained in:
commit
2d22cbc335
|
@ -48,8 +48,7 @@ int CropCPUKernel::CropParallelRun(int thread_id) {
|
|||
float *input_data = reinterpret_cast<float *>(input->Data());
|
||||
float *output_data = reinterpret_cast<float *>(output->Data());
|
||||
auto param = reinterpret_cast<CropParameter *>(op_parameter_);
|
||||
param->thread_id_ = thread_id;
|
||||
Crop4D(input_data, output_data, input->shape().data(), output->shape().data(), param);
|
||||
Crop4D(input_data, output_data, input->shape().data(), output->shape().data(), param, thread_id);
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
|
|
|
@ -26,7 +26,6 @@ typedef struct CropParameter {
|
|||
OpParameter op_parameter_;
|
||||
CropQuantArg quant_arg;
|
||||
int thread_count_;
|
||||
int thread_id_;
|
||||
int offset_size_;
|
||||
int64_t offset_[CROP_OFFSET_MAX_SIZE];
|
||||
int64_t in_offset_[CROP_OFFSET_MAX_SIZE];
|
||||
|
|
|
@ -30,7 +30,8 @@ void Pad4DOffset(CropParameter *crop_param, int64_t *offset) {
|
|||
}
|
||||
}
|
||||
|
||||
void Crop4D(const float *input, float *output, const int *in_shape, const int *out_shape, CropParameter *crop_param) {
|
||||
void Crop4D(const float *input, float *output, const int *in_shape, const int *out_shape, CropParameter *crop_param,
|
||||
int thread_id) {
|
||||
int64_t offset_pad[DIMENSION_4D];
|
||||
Pad4DOffset(crop_param, offset_pad);
|
||||
int out_shape1 = out_shape[1];
|
||||
|
@ -44,7 +45,6 @@ void Crop4D(const float *input, float *output, const int *in_shape, const int *o
|
|||
size_t in_stride0 = in_stride1 * in_shape[1];
|
||||
size_t copy_size = out_shape3 * sizeof(float);
|
||||
size_t count_per_thread = UP_DIV(out_shape1, crop_param->op_parameter_.thread_num_);
|
||||
int thread_id = crop_param->thread_id_;
|
||||
size_t thread_stride = thread_id * count_per_thread;
|
||||
for (int i = 0; i < out_shape[0]; ++i) {
|
||||
size_t out_offset0 = i * out_stride0;
|
||||
|
|
|
@ -23,7 +23,8 @@
|
|||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
void Crop4D(const float *input, float *output, const int *in_shape, const int *out_shape, CropParameter *crop_param);
|
||||
void Crop4D(const float *input, float *output, const int *in_shape, const int *out_shape, CropParameter *crop_param,
|
||||
int thread_id);
|
||||
void Crop4DNoParallel(const float *input, float *output, const int *in_shape, const int *out_shape,
|
||||
CropParameter *crop_param);
|
||||
#ifdef __cplusplus
|
||||
|
|
|
@ -15,6 +15,7 @@
|
|||
*/
|
||||
#include "common/common_test.h"
|
||||
#include "mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/crop.h"
|
||||
#include "mindspore/lite/src/runtime/kernel/arm/fp32/crop.h"
|
||||
|
||||
namespace mindspore {
|
||||
class CropTestFp32 : public mindspore::CommonTest {
|
||||
|
@ -36,8 +37,7 @@ TEST_F(CropTestFp32, CropTest1) {
|
|||
crop_param.offset_[1] = 1;
|
||||
crop_param.offset_[2] = 1;
|
||||
crop_param.op_parameter_.thread_num_ = 1;
|
||||
crop_param.thread_id_ = 0;
|
||||
Crop4D(input, output, in_shape, out_shape, &crop_param);
|
||||
Crop4D(input, output, in_shape, out_shape, &crop_param, 0);
|
||||
for (int i = 0; i < kOutSize; ++i) {
|
||||
std::cout << output[i] << " ";
|
||||
}
|
||||
|
@ -60,8 +60,7 @@ TEST_F(CropTestFp32, CropTest2) {
|
|||
crop_param.offset_[2] = 0;
|
||||
crop_param.offset_[3] = 0;
|
||||
crop_param.op_parameter_.thread_num_ = 1;
|
||||
crop_param.thread_id_ = 0;
|
||||
Crop4D(input, output, in_shape, out_shape, &crop_param);
|
||||
Crop4D(input, output, in_shape, out_shape, &crop_param, 0);
|
||||
for (int i = 0; i < kOutSize; ++i) {
|
||||
std::cout << output[i] << " ";
|
||||
}
|
||||
|
@ -81,8 +80,7 @@ TEST_F(CropTestFp32, CropTest3) {
|
|||
crop_param.axis_ = 3;
|
||||
crop_param.offset_[0] = 1;
|
||||
crop_param.op_parameter_.thread_num_ = 1;
|
||||
crop_param.thread_id_ = 0;
|
||||
Crop4D(input, output, in_shape, out_shape, &crop_param);
|
||||
Crop4D(input, output, in_shape, out_shape, &crop_param, 0);
|
||||
for (int i = 0; i < kOutSize; ++i) {
|
||||
std::cout << output[i] << " ";
|
||||
}
|
||||
|
@ -102,10 +100,8 @@ TEST_F(CropTestFp32, CropTest4) {
|
|||
crop_param.axis_ = 3;
|
||||
crop_param.offset_[0] = 1;
|
||||
crop_param.op_parameter_.thread_num_ = 2;
|
||||
crop_param.thread_id_ = 0;
|
||||
Crop4D(input, output, in_shape, out_shape, &crop_param);
|
||||
crop_param.thread_id_ = 1;
|
||||
Crop4D(input, output, in_shape, out_shape, &crop_param);
|
||||
Crop4D(input, output, in_shape, out_shape, &crop_param, 0);
|
||||
Crop4D(input, output, in_shape, out_shape, &crop_param, 1);
|
||||
for (int i = 0; i < kOutSize; ++i) {
|
||||
std::cout << output[i] << " ";
|
||||
}
|
||||
|
@ -191,10 +187,8 @@ TEST_F(CropTestFp32, CropTest8) {
|
|||
crop_param.offset_[1] = 1;
|
||||
crop_param.offset_[2] = 1;
|
||||
crop_param.op_parameter_.thread_num_ = 2;
|
||||
crop_param.thread_id_ = 0;
|
||||
Crop4D(input, output, in_shape, out_shape, &crop_param);
|
||||
crop_param.thread_id_ = 1;
|
||||
Crop4D(input, output, in_shape, out_shape, &crop_param);
|
||||
Crop4D(input, output, in_shape, out_shape, &crop_param, 0);
|
||||
Crop4D(input, output, in_shape, out_shape, &crop_param, 1);
|
||||
for (int i = 0; i < kOutSize; ++i) {
|
||||
std::cout << output[i] << " ";
|
||||
}
|
||||
|
@ -219,10 +213,106 @@ TEST_F(CropTestFp32, CropTest9) {
|
|||
crop_param.offset_[1] = 1;
|
||||
crop_param.offset_[2] = 1;
|
||||
crop_param.op_parameter_.thread_num_ = 2;
|
||||
crop_param.thread_id_ = 0;
|
||||
Crop4D(input, output, in_shape, out_shape, &crop_param);
|
||||
crop_param.thread_id_ = 1;
|
||||
Crop4D(input, output, in_shape, out_shape, &crop_param);
|
||||
Crop4D(input, output, in_shape, out_shape, &crop_param, 0);
|
||||
Crop4D(input, output, in_shape, out_shape, &crop_param, 1);
|
||||
for (int i = 0; i < kOutSize; ++i) {
|
||||
std::cout << output[i] << " ";
|
||||
}
|
||||
std::cout << "\n";
|
||||
CompareOutputData(output, expect_out, kOutSize, 0.000001);
|
||||
}
|
||||
|
||||
TEST_F(CropTestFp32, CropTest10) {
|
||||
float input[50] = {1, 2, 3, 4, 5,
|
||||
6, 7, 8, 9, 10,
|
||||
11, 12, 13, 14, 15,
|
||||
16, 17, 18, 19, 20,
|
||||
21, 22, 23, 24, 25,
|
||||
26, 27, 28, 29, 30,
|
||||
31, 32, 33, 34, 35,
|
||||
36, 37, 38, 39, 40,
|
||||
41, 42, 43, 44, 45,
|
||||
46, 47, 48, 49, 50};
|
||||
const int kOutSize = 8;
|
||||
float expect_out[kOutSize] = {1, 2,
|
||||
6, 7,
|
||||
26, 27,
|
||||
31, 32};
|
||||
|
||||
float output[kOutSize];
|
||||
int in_shape[4] = {1, 2, 5, 5};
|
||||
int out_shape[4] = {1, 2, 2, 2};
|
||||
CropParameter crop_param;
|
||||
crop_param.axis_ = 2;
|
||||
crop_param.offset_[0] = 0;
|
||||
crop_param.offset_[1] = 0;
|
||||
crop_param.op_parameter_.thread_num_ = 2;
|
||||
Crop4D(input, output, in_shape, out_shape, &crop_param, 1);
|
||||
Crop4D(input, output, in_shape, out_shape, &crop_param, 0);
|
||||
for (int i = 0; i < kOutSize; ++i) {
|
||||
std::cout << output[i] << " ";
|
||||
}
|
||||
std::cout << "\n";
|
||||
CompareOutputData(output, expect_out, kOutSize, 0.000001);
|
||||
}
|
||||
|
||||
TEST_F(CropTestFp32, CropTest11) {
|
||||
float input[100] = {1, 2, 3, 4, 5,
|
||||
6, 7, 8, 9, 10,
|
||||
11, 12, 13, 14, 15,
|
||||
16, 17, 18, 19, 20,
|
||||
21, 22, 23, 24, 25,
|
||||
26, 27, 28, 29, 30,
|
||||
31, 32, 33, 34, 35,
|
||||
36, 37, 38, 39, 40,
|
||||
41, 42, 43, 44, 45,
|
||||
46, 47, 48, 49, 50,
|
||||
1, 2, 3, 4, 5,
|
||||
6, 7, 8, 9, 10,
|
||||
11, 12, 13, 14, 15,
|
||||
16, 17, 18, 19, 20,
|
||||
21, 22, 23, 24, 25,
|
||||
26, 27, 28, 29, 30,
|
||||
31, 32, 33, 34, 35,
|
||||
36, 37, 38, 39, 40,
|
||||
41, 42, 43, 44, 45,
|
||||
46, 47, 48, 49, 50};
|
||||
|
||||
const int kOutSize = 16;
|
||||
float expect_out[kOutSize] = {1, 2,
|
||||
6, 7,
|
||||
26, 27,
|
||||
31, 32,
|
||||
1, 2,
|
||||
6, 7,
|
||||
26, 27,
|
||||
31, 32};
|
||||
std::vector<int> in_shape = {1, 4, 5, 5};
|
||||
std::vector<int> out_shape = {1, 4, 2, 2};
|
||||
std::vector<lite::tensor::Tensor *> inputs;
|
||||
std::vector<lite::tensor::Tensor *> outputs;
|
||||
auto in_t =
|
||||
new lite::tensor::Tensor(kNumberTypeFloat, in_shape, schema::Format_NHWC, static_cast<schema::NodeType>(1));
|
||||
in_t->MallocData();
|
||||
memcpy(in_t->Data(), input, sizeof(float) * in_t->ElementsNum());
|
||||
inputs.push_back(in_t);
|
||||
|
||||
auto out_t =
|
||||
new lite::tensor::Tensor(kNumberTypeFloat, out_shape, schema::Format_NHWC, static_cast<schema::NodeType>(1));
|
||||
out_t->MallocData();
|
||||
outputs.push_back(out_t);
|
||||
|
||||
auto ctx = new (std::nothrow) lite::Context;
|
||||
ctx->thread_num_ = 2;
|
||||
CropParameter crop_param;
|
||||
crop_param.axis_ = 2;
|
||||
crop_param.offset_[0] = 0;
|
||||
crop_param.offset_[1] = 0;
|
||||
auto kernel = new kernel::CropCPUKernel(reinterpret_cast<OpParameter *>(&crop_param), inputs, outputs, ctx, nullptr);
|
||||
kernel->Init();
|
||||
kernel->Run();
|
||||
|
||||
float *output = reinterpret_cast<float *>(outputs[0]->Data());
|
||||
for (int i = 0; i < kOutSize; ++i) {
|
||||
std::cout << output[i] << " ";
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue