forked from mindspore-Ecosystem/mindspore
[MS][LITE][Develop]Fp16 conv1x1 bug
This commit is contained in:
parent
6d0bbb36a3
commit
0fac817a2d
|
@ -15,14 +15,28 @@
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include "nnacl/fp16/matmul_fp16.h"
|
#include "nnacl/fp16/matmul_fp16.h"
|
||||||
void ColMajor2Row8MajorFp16(float16_t *src_ptr, float16_t *dst_ptr, size_t row, size_t col) {
|
|
||||||
for (int r = 0; r < row; r++) {
|
void ColMajor2Row8MajorFp16(void *src_ptr, float16_t *dst_ptr, size_t row, size_t col, bool src_float16) {
|
||||||
for (int c = 0; c < col; c++) {
|
if (src_float16) {
|
||||||
int cd8 = c / 8;
|
float16_t *src = (float16_t *)src_ptr;
|
||||||
int cm8 = c % 8;
|
for (int r = 0; r < row; r++) {
|
||||||
dst_ptr[cd8 * 8 * row + r * 8 + cm8] = src_ptr[c * row + r];
|
for (int c = 0; c < col; c++) {
|
||||||
|
int cd8 = c / 8;
|
||||||
|
int cm8 = c % 8;
|
||||||
|
dst_ptr[cd8 * 8 * row + r * 8 + cm8] = (float16_t)(src[c * row + r]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
float *src = (float *)src_ptr;
|
||||||
|
for (int r = 0; r < row; r++) {
|
||||||
|
for (int c = 0; c < col; c++) {
|
||||||
|
int cd8 = c / 8;
|
||||||
|
int cm8 = c % 8;
|
||||||
|
dst_ptr[cd8 * 8 * row + r * 8 + cm8] = (float16_t)(src[c * row + r]);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
void MatMul16x8(const float16_t *a, const float16_t *b, float16_t *dst, const float16_t *bias, ActType act_type,
|
void MatMul16x8(const float16_t *a, const float16_t *b, float16_t *dst, const float16_t *bias, ActType act_type,
|
||||||
|
|
|
@ -32,7 +32,7 @@ extern "C" {
|
||||||
void MatMulFp16(const float16_t *a, const float16_t *b, float16_t *c, const float16_t *bias, ActType act_type,
|
void MatMulFp16(const float16_t *a, const float16_t *b, float16_t *c, const float16_t *bias, ActType act_type,
|
||||||
int depth, int row, int col, int stride, bool write_nhwc);
|
int depth, int row, int col, int stride, bool write_nhwc);
|
||||||
|
|
||||||
void ColMajor2Row8MajorFp16(float16_t *src_ptr, float16_t *dst_ptr, size_t row, size_t col);
|
void ColMajor2Row8MajorFp16(void *src_ptr, float16_t *dst_ptr, size_t row, size_t col, bool src_float16);
|
||||||
|
|
||||||
void RowMajor2Col16MajorFp16(float16_t *src_ptr, float16_t *dst_ptr, size_t row, size_t col);
|
void RowMajor2Col16MajorFp16(float16_t *src_ptr, float16_t *dst_ptr, size_t row, size_t col);
|
||||||
|
|
||||||
|
|
|
@ -74,31 +74,36 @@ int Convolution1x1FP16CPUKernel::InitConv1x1Param() {
|
||||||
}
|
}
|
||||||
|
|
||||||
int Convolution1x1FP16CPUKernel::InitWeightBias() {
|
int Convolution1x1FP16CPUKernel::InitWeightBias() {
|
||||||
auto ret = ConvolutionBaseFP16CPUKernel::GetExecuteFilter();
|
auto bias_tensor = in_tensors_.at(kBiasIndex);
|
||||||
if (ret != RET_OK) {
|
auto weight_tensor = in_tensors_.at(kWeightIndex);
|
||||||
MS_LOG(ERROR) << "Get Execute filter failed.";
|
auto input_channel = weight_tensor->Channel();
|
||||||
return ret;
|
auto output_channel = weight_tensor->Batch();
|
||||||
}
|
|
||||||
|
|
||||||
bias_data_ = malloc(matmul_param_->col_8_ * sizeof(float16_t));
|
size_t size = UP_ROUND(output_channel, C8NUM) * sizeof(float16_t);
|
||||||
|
bias_data_ = malloc(size);
|
||||||
if (bias_data_ == nullptr) {
|
if (bias_data_ == nullptr) {
|
||||||
MS_LOG(ERROR) << "Conv1x1 Malloc bias_ptr_ error!";
|
MS_LOG(ERROR) << "Conv1x1 Malloc bias_ptr_ error!";
|
||||||
return RET_ERROR;
|
return RET_ERROR;
|
||||||
}
|
}
|
||||||
memset(bias_data_, 0, matmul_param_->col_8_ * sizeof(float16_t));
|
memset(bias_data_, 0, size);
|
||||||
if (in_tensors_.size() == 3) {
|
if (in_tensors_.size() == 3) {
|
||||||
Float32ToFloat16(reinterpret_cast<float *>(in_tensors_[2]->Data()), reinterpret_cast<float16_t *>(bias_data_),
|
if (bias_tensor->data_type() == kNumberTypeFloat16) {
|
||||||
conv_param_->output_channel_);
|
memcpy(bias_data_, bias_tensor->Data(), output_channel * sizeof(float16_t));
|
||||||
|
} else {
|
||||||
|
Float32ToFloat16(reinterpret_cast<float *>(bias_tensor->Data()), reinterpret_cast<float16_t *>(bias_data_),
|
||||||
|
output_channel);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
weight_ptr_ = reinterpret_cast<float16_t *>(malloc(matmul_param_->deep_ * matmul_param_->col_8_ * sizeof(float16_t)));
|
size = input_channel * UP_ROUND(output_channel, C8NUM) * sizeof(float16_t);
|
||||||
|
weight_ptr_ = reinterpret_cast<float16_t *>(malloc(size));
|
||||||
if (weight_ptr_ == nullptr) {
|
if (weight_ptr_ == nullptr) {
|
||||||
MS_LOG(ERROR) << "Conv1x1 Malloc weight_ptr_ error!";
|
MS_LOG(ERROR) << "Conv1x1 Malloc weight_ptr_ error!";
|
||||||
return RET_ERROR;
|
return RET_ERROR;
|
||||||
}
|
}
|
||||||
memset(weight_ptr_, 0, matmul_param_->deep_ * matmul_param_->col_8_ * sizeof(float16_t));
|
memset(weight_ptr_, 0, size);
|
||||||
ColMajor2Row8MajorFp16(reinterpret_cast<float16_t *>(execute_weight_), weight_ptr_, matmul_param_->deep_,
|
ColMajor2Row8MajorFp16(weight_tensor->Data(), weight_ptr_, input_channel, output_channel,
|
||||||
matmul_param_->col_);
|
weight_tensor->data_type() == kNumberTypeFloat16);
|
||||||
return RET_OK;
|
return RET_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -106,6 +111,13 @@ int Convolution1x1FP16CPUKernel::Init() {
|
||||||
if (!InferShapeDone()) {
|
if (!InferShapeDone()) {
|
||||||
return RET_OK;
|
return RET_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
matmul_param_ = new (std::nothrow) MatMulParameter();
|
||||||
|
if (matmul_param_ == nullptr) {
|
||||||
|
MS_LOG(ERROR) << "Init matmul_param_ failed.";
|
||||||
|
return RET_ERROR;
|
||||||
|
}
|
||||||
|
|
||||||
int ret = InitWeightBias();
|
int ret = InitWeightBias();
|
||||||
if (ret != RET_OK) {
|
if (ret != RET_OK) {
|
||||||
MS_LOG(ERROR) << "Init weight bias failed.";
|
MS_LOG(ERROR) << "Init weight bias failed.";
|
||||||
|
|
|
@ -31,9 +31,7 @@ class Convolution1x1FP16CPUKernel : public ConvolutionBaseFP16CPUKernel {
|
||||||
Convolution1x1FP16CPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs,
|
Convolution1x1FP16CPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs,
|
||||||
const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx,
|
const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx,
|
||||||
const mindspore::lite::PrimitiveC *primitive)
|
const mindspore::lite::PrimitiveC *primitive)
|
||||||
: ConvolutionBaseFP16CPUKernel(parameter, inputs, outputs, ctx, primitive) {
|
: ConvolutionBaseFP16CPUKernel(parameter, inputs, outputs, ctx, primitive) {}
|
||||||
matmul_param_ = new MatMulParameter();
|
|
||||||
}
|
|
||||||
~Convolution1x1FP16CPUKernel() override;
|
~Convolution1x1FP16CPUKernel() override;
|
||||||
|
|
||||||
int Init() override;
|
int Init() override;
|
||||||
|
@ -50,7 +48,7 @@ class Convolution1x1FP16CPUKernel : public ConvolutionBaseFP16CPUKernel {
|
||||||
|
|
||||||
private:
|
private:
|
||||||
bool pre_trans_input_ = false;
|
bool pre_trans_input_ = false;
|
||||||
int thread_count_ = 0;
|
int thread_count_ = 1;
|
||||||
int thread_stride_ = 0;
|
int thread_stride_ = 0;
|
||||||
float16_t *weight_ptr_ = nullptr;
|
float16_t *weight_ptr_ = nullptr;
|
||||||
float16_t *input_ptr_ = nullptr;
|
float16_t *input_ptr_ = nullptr;
|
||||||
|
|
|
@ -23,6 +23,14 @@
|
||||||
#include "src/runtime/runtime_api.h"
|
#include "src/runtime/runtime_api.h"
|
||||||
|
|
||||||
namespace mindspore::kernel {
|
namespace mindspore::kernel {
|
||||||
|
|
||||||
|
ConvolutionBaseFP16CPUKernel::~ConvolutionBaseFP16CPUKernel() {
|
||||||
|
if (fp16_weight_ != nullptr) {
|
||||||
|
free(fp16_weight_);
|
||||||
|
fp16_weight_ = nullptr;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
int ConvolutionBaseFP16CPUKernel::GetExecuteTensor() {
|
int ConvolutionBaseFP16CPUKernel::GetExecuteTensor() {
|
||||||
// ===================input====================//
|
// ===================input====================//
|
||||||
auto input_tensor = in_tensors_.at(kInputIndex);
|
auto input_tensor = in_tensors_.at(kInputIndex);
|
||||||
|
@ -65,6 +73,7 @@ int ConvolutionBaseFP16CPUKernel::GetExecuteFilter() {
|
||||||
} else {
|
} else {
|
||||||
auto *origin_weight = reinterpret_cast<float16_t *>(in_tensors_.at(kWeightIndex)->Data());
|
auto *origin_weight = reinterpret_cast<float16_t *>(in_tensors_.at(kWeightIndex)->Data());
|
||||||
execute_weight_ = origin_weight;
|
execute_weight_ = origin_weight;
|
||||||
|
fp16_weight_ = nullptr;
|
||||||
}
|
}
|
||||||
return RET_OK;
|
return RET_OK;
|
||||||
}
|
}
|
||||||
|
|
|
@ -30,7 +30,7 @@ class ConvolutionBaseFP16CPUKernel : public ConvolutionBaseCPUKernel {
|
||||||
const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx,
|
const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx,
|
||||||
const mindspore::lite::PrimitiveC *primitive)
|
const mindspore::lite::PrimitiveC *primitive)
|
||||||
: ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {}
|
: ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {}
|
||||||
~ConvolutionBaseFP16CPUKernel() override = default;
|
~ConvolutionBaseFP16CPUKernel() override;
|
||||||
|
|
||||||
int Init() override { return RET_OK; }
|
int Init() override { return RET_OK; }
|
||||||
int ReSize() override { return RET_OK; }
|
int ReSize() override { return RET_OK; }
|
||||||
|
|
|
@ -244,8 +244,7 @@ kernel::LiteKernel *CpuConvFp16KernelCreator(const std::vector<lite::tensor::Ten
|
||||||
if (kernel_h == 3 && kernel_w == 3 && stride_h == 1 && stride_w == 1 && dilation_h == 1 && dilation_w == 1) {
|
if (kernel_h == 3 && kernel_w == 3 && stride_h == 1 && stride_w == 1 && dilation_h == 1 && dilation_w == 1) {
|
||||||
kernel = new (std::nothrow) kernel::Convolution3x3FP16CPUKernel(opParameter, inputs, outputs, ctx, primitive);
|
kernel = new (std::nothrow) kernel::Convolution3x3FP16CPUKernel(opParameter, inputs, outputs, ctx, primitive);
|
||||||
} else if (kernel_h == 1 && kernel_w == 1) {
|
} else if (kernel_h == 1 && kernel_w == 1) {
|
||||||
// kernel = new (std::nothrow) kernel::Convolution1x1FP16CPUKernel(opParameter, inputs, outputs, ctx, primitive);
|
kernel = new (std::nothrow) kernel::Convolution1x1FP16CPUKernel(opParameter, inputs, outputs, ctx, primitive);
|
||||||
kernel = new (std::nothrow) kernel::ConvolutionFP16CPUKernel(opParameter, inputs, outputs, ctx, primitive);
|
|
||||||
} else {
|
} else {
|
||||||
bool use_winograd = false;
|
bool use_winograd = false;
|
||||||
int out_unit;
|
int out_unit;
|
||||||
|
|
Loading…
Reference in New Issue