forked from mindspore-Ecosystem/mindspore
!4401 add pad int8 multi thread
Merge pull request !4401 from zhaozhenlong/lite/op/pad_int8_threads
This commit is contained in:
commit
d3ef9f77c5
|
@ -111,18 +111,35 @@ int PadInt8CPUKernel::Init() {
|
|||
return RET_OK;
|
||||
}
|
||||
|
||||
int PadInt8CPUKernel::RunImpl(int task_id) {
|
||||
return PadConstant4D(in_data_, out_data_, in_dims_, out_dims_, pad_param_->paddings_, task_id, context_->thread_num_);
|
||||
}
|
||||
|
||||
int PadInt8Impl(int task_id, LiteParallelGroupEnv *penv, void *cdata) {
|
||||
auto resize = reinterpret_cast<PadInt8CPUKernel *>(cdata);
|
||||
auto error_code = resize->RunImpl(task_id);
|
||||
if (error_code != RET_OK) {
|
||||
MS_LOG(ERROR) << "Resize Run error task_id[" << task_id << "] error_code[" << error_code << "]";
|
||||
return RET_ERROR;
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int PadInt8CPUKernel::Run() {
|
||||
auto ret = Prepare();
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Prepare failed.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
int8_t *in_data = reinterpret_cast<int8_t *>(in_tensors_[0]->Data());
|
||||
int8_t *out_data = reinterpret_cast<int8_t *>(out_tensors_[0]->Data());
|
||||
in_data_ = reinterpret_cast<int8_t *>(in_tensors_[0]->Data());
|
||||
out_data_ = reinterpret_cast<int8_t *>(out_tensors_[0]->Data());
|
||||
|
||||
memset(out_data, pad_param_->pad_quant_arg_.constant_value_[0], out_tensors_[0]->ElementsNum() * sizeof(int8_t));
|
||||
PadConstant4D(in_data, out_data, in_dims_, out_dims_, pad_param_->paddings_);
|
||||
memset(out_data_, pad_param_->pad_quant_arg_.constant_value_[0], out_tensors_[0]->ElementsNum() * sizeof(int8_t));
|
||||
int error_code = LiteBackendParallelLaunch(PadInt8Impl, this, context_->thread_num_);
|
||||
if (error_code != RET_OK) {
|
||||
MS_LOG(ERROR) << "Resize run error, error_code[" << error_code << "]";
|
||||
return RET_ERROR;
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
} // namespace mindspore::kernel
|
||||
|
|
|
@ -38,6 +38,7 @@ class PadInt8CPUKernel : public LiteKernel {
|
|||
int Init() override;
|
||||
int ReSize() override;
|
||||
int Run() override;
|
||||
int RunImpl(int task_id);
|
||||
|
||||
private:
|
||||
int SetQuantParam();
|
||||
|
@ -46,6 +47,8 @@ class PadInt8CPUKernel : public LiteKernel {
|
|||
|
||||
private:
|
||||
PadParameter *pad_param_;
|
||||
int8_t *in_data_;
|
||||
int8_t *out_data_;
|
||||
int in_dims_[DEFAULT_PAD_NDIMS];
|
||||
int out_dims_[DEFAULT_PAD_NDIMS];
|
||||
};
|
||||
|
|
|
@ -16,12 +16,13 @@
|
|||
|
||||
#include "nnacl/int8/pad.h"
|
||||
#include "nnacl/common_func.h"
|
||||
#include "nnacl/errorcode.h"
|
||||
|
||||
void PadConstant4D(const int8_t *in_data, int8_t *out_data, const int32_t *in_dims, const int32_t *out_dims,
|
||||
const int32_t *paddings) {
|
||||
int PadConstant4D(const int8_t *in_data, int8_t *out_data, const int32_t *in_dims, const int32_t *out_dims,
|
||||
const int32_t *paddings, const int tid, const int thread_num) {
|
||||
int32_t copy_size = in_dims[3];
|
||||
for (int n = 0; n < in_dims[0]; n++) {
|
||||
for (int h = 0; h < in_dims[1]; h++) {
|
||||
for (int h = tid; h < in_dims[1]; h += thread_num) {
|
||||
for (int w = 0; w < in_dims[2]; w++) {
|
||||
const int8_t *in = in_data + offset(in_dims, n, h, w, 0);
|
||||
int8_t *out = out_data + offset(out_dims, n + paddings[0], h + paddings[2], w + paddings[4], paddings[6]);
|
||||
|
@ -29,5 +30,5 @@ void PadConstant4D(const int8_t *in_data, int8_t *out_data, const int32_t *in_di
|
|||
}
|
||||
}
|
||||
}
|
||||
return;
|
||||
return NNACL_OK;
|
||||
}
|
||||
|
|
|
@ -24,8 +24,8 @@
|
|||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
void PadConstant4D(const int8_t *in_data, int8_t *out_data, const int32_t *in_dims, const int32_t *out_dims,
|
||||
const int32_t *paddings);
|
||||
int PadConstant4D(const int8_t *in_data, int8_t *out_data, const int32_t *in_dims, const int32_t *out_dims,
|
||||
const int32_t *paddings, const int tid, const int thread_num);
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -183,6 +183,7 @@ TEST_F(TestPadInt8, PadInt8TestInit4) {
|
|||
std::vector<lite::tensor::Tensor *> outputs_;
|
||||
auto pad_param = new PadParameter();
|
||||
lite::Context *ctx = new lite::Context;
|
||||
ctx->thread_num_ = 2;
|
||||
int8_t *correct;
|
||||
int total_size = PadInt8TestInit2(&inputs_, &outputs_, pad_param, &correct);
|
||||
kernel::PadInt8CPUKernel *pad =
|
||||
|
|
Loading…
Reference in New Issue