[MSLITE][DEVELOP] fix bug of npu+fp16, convert const tensor data from fp16 -> fp32

This commit is contained in:
yangruoqi713 2021-07-03 17:42:35 +08:00
parent d2af221cf3
commit 6974cf6b7b
11 changed files with 115 additions and 50 deletions

View File

@ -15,8 +15,45 @@
*/
#include "src/delegate/npu/npu_converter_utils.h"
#include <arm_neon.h>
#include "src/common/log_adapter.h"
namespace mindspore {
#define C8NUM 8
#ifdef ENABLE_ARM64
void Float32ToFloat16(const float *__restrict input, float16_t *__restrict output, int number) {
int count = (number & ~(C8NUM - 1));
int i = 0;
for (; i < count; i += C8NUM) {
float32x4_t in1 = vld1q_f32(input + i);
float16x4_t out1 = vcvt_f16_f32(in1);
float32x4_t in2 = vld1q_f32(input + i + 4);
float16x4_t out2 = vcvt_f16_f32(in2);
float16x8_t out = vcombine_f16(out1, out2);
vst1q_f16(output + i, out);
}
for (; i < number; ++i) {
output[i] = static_cast<float16_t>(input[i]);
}
}
void Float16ToFloat32(const float16_t *__restrict input, float *__restrict output, int number) {
int count = number & ~(C8NUM - 1);
int i = 0;
for (; i < count; i += C8NUM) {
float16x8_t in = vld1q_f16(input + i);
float16x4_t in1 = vget_low_f16(in);
float16x4_t in2 = vget_high_f16(in);
float32x4_t out1 = vcvt_f32_f16(in1);
vst1q_f32(output + i, out1);
float32x4_t out2 = vcvt_f32_f16(in2);
vst1q_f32(output + i + 4, out2);
}
for (; i < number; ++i) {
output[i] = static_cast<float>(input[i]);
}
}
#endif
ge::Shape ConverterToNPUShape(const std::vector<int> &src_shape) {
vector<int64_t> shapes;
shapes.reserve(src_shape.size());
@ -50,10 +87,8 @@ ge::DataType ConverterToNPUDataType(TypeId type_id) {
switch (type_id) {
case kNumberTypeFloat:
case kNumberTypeFloat32:
data_type = ge::DT_FLOAT;
break;
case kNumberTypeFloat16:
data_type = ge::DT_FLOAT16;
data_type = ge::DT_FLOAT;
break;
case kNumberTypeInt8:
data_type = ge::DT_INT8;
@ -93,7 +128,7 @@ std::shared_ptr<ge::Tensor> ConverterToNPUTensor(tensor::MSTensor *src) {
std::shared_ptr<ge::Tensor> ge_tensor = std::shared_ptr<ge::Tensor>(new (std::nothrow) ge::Tensor());
if (ge_tensor == nullptr) {
MS_LOG(ERROR) << "new ge_tensor failed.";
return ge_tensor;
return nullptr;
}
ge::TensorDesc tensor_desc(ConverterToNPUShape(src->shape()), ge::FORMAT_NCHW,
ConverterToNPUDataType(src->data_type()));
@ -101,7 +136,20 @@ std::shared_ptr<ge::Tensor> ConverterToNPUTensor(tensor::MSTensor *src) {
ge_tensor->SetTensorDesc(tensor_desc);
if (src->data() != nullptr) {
ge_tensor->SetData(reinterpret_cast<const uint8_t *>(src->data()), src->Size());
if (src->data_type() == kNumberTypeFloat16) {
#ifdef ENABLE_ARM64
auto fp32_data = malloc(src->ElementsNum() * sizeof(float));
Float16ToFloat32(reinterpret_cast<float16_t *>(src->data()), reinterpret_cast<float *>(fp32_data),
src->ElementsNum());
ge_tensor->SetData(reinterpret_cast<const uint8_t *>(fp32_data), src->ElementsNum() * sizeof(float));
free(fp32_data);
#else
MS_LOG(ERROR) << "This platform does not support fp16.";
return nullptr;
#endif
} else {
ge_tensor->SetData(reinterpret_cast<const uint8_t *>(src->data()), src->Size());
}
}
return ge_tensor;
}

View File

@ -217,6 +217,17 @@ NPUOp *NPUDelegate::GetOP(kernel::Kernel *kernel, const schema::Primitive *primi
return nullptr;
}
}
for (auto tensor : in_tensors) {
if (tensor->data_type() == kNumberTypeFloat16 && tensor->data() == nullptr) {
tensor->set_data_type(kNumberTypeFloat32);
}
}
for (auto tensor : out_tensors) {
if (tensor->data_type() == kNumberTypeFloat16) {
tensor->set_data_type(kNumberTypeFloat32);
}
}
return npu_op;
}

View File

@ -45,7 +45,7 @@ int BatchnormNPUOp::SetNPUInputs(const std::vector<tensor::MSTensor *> &in_tenso
MS_LOG(ERROR) << "New scale const failed.";
return RET_ERROR;
}
auto scale_tensor = mindspore::ConverterToNPUTensor(in_tensors[1]);
auto scale_tensor = ConverterToNPUTensor(in_tensors[1]);
scale->set_attr_value(scale_tensor);
batchnorm_->set_input_scale(*scale);
@ -54,7 +54,7 @@ int BatchnormNPUOp::SetNPUInputs(const std::vector<tensor::MSTensor *> &in_tenso
MS_LOG(ERROR) << "New offset const failed.";
return RET_ERROR;
}
auto offset_tensor = mindspore::ConverterToNPUTensor(in_tensors[2]);
auto offset_tensor = ConverterToNPUTensor(in_tensors[2]);
offset->set_attr_value(offset_tensor);
batchnorm_->set_input_offset(*offset);
@ -63,7 +63,7 @@ int BatchnormNPUOp::SetNPUInputs(const std::vector<tensor::MSTensor *> &in_tenso
MS_LOG(ERROR) << "New mean const failed.";
return RET_ERROR;
}
auto mean_tensor = mindspore::ConverterToNPUTensor(in_tensors[3]);
auto mean_tensor = ConverterToNPUTensor(in_tensors[3]);
mean->set_attr_value(mean_tensor);
batchnorm_->set_input_mean(*mean);
@ -72,7 +72,7 @@ int BatchnormNPUOp::SetNPUInputs(const std::vector<tensor::MSTensor *> &in_tenso
MS_LOG(ERROR) << "New variance const failed.";
return RET_ERROR;
}
auto variance_tensor = mindspore::ConverterToNPUTensor(in_tensors[4]);
auto variance_tensor = ConverterToNPUTensor(in_tensors[4]);
variance->set_attr_value(variance_tensor);
batchnorm_->set_input_variance(*variance);
return RET_OK;

View File

@ -17,6 +17,8 @@
#include "src/delegate/npu/op/convolution_base_npu.h"
#include "src/delegate/npu/npu_converter_utils.h"
#include "src/delegate/npu/transpose_kernel.h"
#include "nnacl/fp16/cast_fp16.h"
namespace mindspore {
ConvolutionBaseNPUOp::~ConvolutionBaseNPUOp() {
if (act_ != nullptr) {
@ -40,13 +42,20 @@ int ConvolutionBaseNPUOp::InitWeightConst(const std::vector<tensor::MSTensor *>
return RET_ERROR;
}
auto w_shape = inputs[1]->shape();
auto nhwc_data = inputs[1]->data();
auto origin_data = inputs[1]->data();
auto fp32_data = origin_data;
if (inputs[1]->data_type() == kNumberTypeFloat16) {
fp32_data = reinterpret_cast<float *>(malloc(inputs[1]->ElementsNum() * sizeof(float)));
// fp16->fp32
Float16ToFloat32(reinterpret_cast<float16_t *>(origin_data), reinterpret_cast<float *>(fp32_data),
inputs[1]->ElementsNum());
}
auto nchw_data = reinterpret_cast<float *>(malloc(inputs[1]->ElementsNum() * sizeof(float)));
if (nchw_data == nullptr) {
MS_LOG(ERROR) << "Malloc buffer failed.";
return RET_ERROR;
}
PackNHWCToNCHWFp32(nhwc_data, nchw_data, w_shape[0], w_shape[1] * w_shape[2], w_shape[3]);
PackNHWCToNCHWFp32(fp32_data, nchw_data, w_shape[0], w_shape[1] * w_shape[2], w_shape[3]);
std::shared_ptr<ge::Tensor> weight_tensor = std::shared_ptr<ge::Tensor>(new (std::nothrow) ge::Tensor());
if (weight_tensor == nullptr) {
@ -56,7 +65,7 @@ int ConvolutionBaseNPUOp::InitWeightConst(const std::vector<tensor::MSTensor *>
ge::TensorDesc tensor_desc(ConverterToNPUShape({w_shape[0], w_shape[3], w_shape[1], w_shape[2]}), ge::FORMAT_NCHW,
ConverterToNPUDataType(inputs[1]->data_type()));
weight_tensor->SetTensorDesc(tensor_desc);
weight_tensor->SetData(reinterpret_cast<const uint8_t *>(nchw_data), inputs[1]->Size());
weight_tensor->SetData(reinterpret_cast<const uint8_t *>(nchw_data), inputs[1]->ElementsNum() * sizeof(float));
weight_->set_attr_value(weight_tensor);
free(nchw_data);
@ -70,15 +79,11 @@ int ConvolutionBaseNPUOp::InitBiasConst(const std::vector<tensor::MSTensor *> &i
MS_LOG(ERROR) << "New bias const failed.";
return RET_ERROR;
}
std::shared_ptr<ge::Tensor> bias_tensor = std::shared_ptr<ge::Tensor>(new (std::nothrow) ge::Tensor());
std::shared_ptr<ge::Tensor> bias_tensor = ConverterToNPUTensor(inputs[2]);
if (bias_tensor == nullptr) {
MS_LOG(ERROR) << "new bias_tensor failed.";
MS_LOG(ERROR) << "Get bias_tensor failed.";
return RET_ERROR;
}
ge::TensorDesc tensor_desc(ConverterToNPUShape({inputs[2]->shape()[0]}), ge::FORMAT_NCHW,
ConverterToNPUDataType(inputs[2]->data_type()));
bias_tensor->SetTensorDesc(tensor_desc);
bias_tensor->SetData(reinterpret_cast<const uint8_t *>(inputs[2]->data()), inputs[2]->Size());
bias_->set_attr_value(bias_tensor);
}
return RET_OK;

View File

@ -139,6 +139,11 @@ NPUOp *GetNPUConvOp(const schema::Primitive *primitive, const std::vector<tensor
return nullptr;
}
if (in_tensors[0]->data_type() != kNumberTypeFloat32 && in_tensors[0]->data_type() != kNumberTypeFloat16) {
MS_LOG(ERROR) << "Npu does not support datatype " << in_tensors[0]->data_type();
return nullptr;
}
NPUOp *op = nullptr;
auto conv_prim = primitive->value_as_Conv2DFusion();
auto group = static_cast<int>(conv_prim->group());

View File

@ -65,7 +65,7 @@ int FullconnectionNPUOp::SetNPUInputs(const std::vector<tensor::MSTensor *> &in_
MS_LOG(ERROR) << "New weight const failed.";
return RET_ERROR;
}
auto weight_tensor = mindspore::ConverterToNPUTensor(in_tensors[1]);
auto weight_tensor = ConverterToNPUTensor(in_tensors[1]);
weight_->set_attr_value(weight_tensor);
fc_->set_input_x2(*weight_).set_attr_transpose_x2(true);

View File

@ -41,15 +41,13 @@ int InstanceNormNPUOp::SetNPUInputs(const std::vector<tensor::MSTensor *> &in_te
instance_norm_->set_input_x(*npu_inputs[0]);
auto gamma_shape = in_tensors[1]->shape();
std::shared_ptr<ge::Tensor> gamma_tensor = std::shared_ptr<ge::Tensor>(new (std::nothrow) ge::Tensor());
auto gamma_tensor = ConverterToNPUTensor(in_tensors[1]);
if (gamma_tensor == nullptr) {
MS_LOG(ERROR) << "new gamma_tensor failed.";
MS_LOG(ERROR) << "Get gamma_tensor failed.";
return RET_ERROR;
}
ge::TensorDesc gamma_tensor_desc(ConverterToNPUShape({1, gamma_shape[0], 1, 1}), ge::FORMAT_NCHW,
ConverterToNPUDataType(in_tensors[1]->data_type()));
gamma_tensor->SetTensorDesc(gamma_tensor_desc);
gamma_tensor->SetData(reinterpret_cast<const uint8_t *>(in_tensors[1]->data()), in_tensors[1]->Size());
gamma_tensor->SetTensorDesc(ge::TensorDesc(ConverterToNPUShape({1, gamma_shape[0], 1, 1})));
gamma_ = new (std::nothrow) hiai::op::Const(name_ + "_gamma");
if (gamma_ == nullptr) {
MS_LOG(ERROR) << "New gamma_ const failed.";
@ -59,15 +57,13 @@ int InstanceNormNPUOp::SetNPUInputs(const std::vector<tensor::MSTensor *> &in_te
instance_norm_->set_input_gamma(*gamma_);
auto beta_shape = in_tensors[2]->shape();
std::shared_ptr<ge::Tensor> beta_tensor = std::shared_ptr<ge::Tensor>(new (std::nothrow) ge::Tensor());
auto beta_tensor = ConverterToNPUTensor(in_tensors[2]);
if (beta_tensor == nullptr) {
MS_LOG(ERROR) << "new beta_tensor failed.";
MS_LOG(ERROR) << "Get beta_tensor failed.";
return RET_ERROR;
}
ge::TensorDesc beta_tensor_desc(ConverterToNPUShape({1, beta_shape[0], 1, 1}), ge::FORMAT_NCHW,
ConverterToNPUDataType(in_tensors[2]->data_type()));
beta_tensor->SetTensorDesc(beta_tensor_desc);
beta_tensor->SetData(reinterpret_cast<const uint8_t *>(in_tensors[2]->data()), in_tensors[2]->Size());
beta_tensor->SetTensorDesc(ge::TensorDesc(ConverterToNPUShape({1, beta_shape[0], 1, 1})));
beta_ = new (std::nothrow) hiai::op::Const(name_ + "_beta");
if (beta_ == nullptr) {
MS_LOG(ERROR) << "New beta_ const failed.";

View File

@ -61,18 +61,18 @@ int MatMulNPUOp::SetNPUInputs(const std::vector<tensor::MSTensor *> &in_tensors,
}
add_op_->set_input_x1(*matmul_);
auto bias_shape = in_tensors[2]->shape();
auto bias_tensor = std::make_shared<ge::Tensor>();
auto bias_tensor = ConverterToNPUTensor(in_tensors[2]);
if (bias_tensor == nullptr) {
MS_LOG(ERROR) << "new bias_tensor failed.";
MS_LOG(ERROR) << "Get bias_tensor failed.";
return RET_ERROR;
}
ge::TensorDesc bias_tensor_desc(ConverterToNPUShape({1, bias_shape[0], 1, 1}), ge::FORMAT_NCHW,
ConverterToNPUDataType(in_tensors[2]->data_type()));
ge::TensorDesc bias_tensor_desc(ConverterToNPUShape({1, bias_shape[0], 1, 1}));
if (out_tensors[0]->shape().size() == 2) {
bias_tensor_desc.SetShape(ConverterToNPUShape({1, bias_shape[0]}));
}
bias_tensor->SetTensorDesc(bias_tensor_desc);
bias_tensor->SetData(reinterpret_cast<const uint8_t *>(in_tensors[2]->data()), in_tensors[2]->Size());
bias_ = new (std::nothrow) hiai::op::Const(name_ + "_bias");
if (bias_ == nullptr) {
MS_LOG(ERROR) << "new bias const failed.";

View File

@ -126,7 +126,8 @@ NPUOp *GetNPUOp(const schema::Primitive *primitive, const std::vector<tensor::MS
std::set<schema::PrimitiveType> int32_lists = {schema::PrimitiveType_Cast, schema::PrimitiveType_StridedSlice};
auto support_int32 = in_tensors[0]->data_type() == kNumberTypeInt32 &&
find(int32_lists.begin(), int32_lists.end(), primitive->value_type()) != int32_lists.end();
if (in_tensors[0]->data_type() != kNumberTypeFloat32 && !support_int32) {
if (in_tensors[0]->data_type() != kNumberTypeFloat32 && in_tensors[0]->data_type() != kNumberTypeFloat16 &&
!support_int32) {
MS_LOG(ERROR) << "Npu does not support datatype " << in_tensors[0]->data_type() << " for op type "
<< primitive->value_type();
return nullptr;

View File

@ -67,16 +67,14 @@ int ScaleNPUOp::SetNPUInputs(const std::vector<tensor::MSTensor *> &in_tensors,
const std::vector<ge::Operator *> &npu_inputs) {
op_->set_input_x(*npu_inputs.at(0));
MS_ASSERT(in_tensors.size() > 1);
auto scale_shape = in_tensors.at(1)->shape();
std::shared_ptr<ge::Tensor> scale_tensor = std::shared_ptr<ge::Tensor>(new (std::nothrow) ge::Tensor());
auto scale_shape = in_tensors[1]->shape();
auto scale_tensor = ConverterToNPUTensor(in_tensors[1]);
if (scale_tensor == nullptr) {
MS_LOG(ERROR) << "new scale_tensor failed.";
MS_LOG(ERROR) << "Get scale_tensor failed.";
return RET_ERROR;
}
ge::TensorDesc scale_tensor_desc(ConverterToNPUShape({1, scale_shape[0], 1, 1}), ge::FORMAT_NCHW,
ConverterToNPUDataType(in_tensors[1]->data_type()));
scale_tensor->SetTensorDesc(scale_tensor_desc);
scale_tensor->SetData(reinterpret_cast<const uint8_t *>(in_tensors[1]->data()), in_tensors[1]->Size());
scale_tensor->SetTensorDesc(ge::TensorDesc(ConverterToNPUShape({1, scale_shape[0], 1, 1})));
scale_ = new (std::nothrow) hiai::op::Const(name_ + "_scale");
if (scale_ == nullptr) {
MS_LOG(ERROR) << "New scale_ const failed.";
@ -87,15 +85,13 @@ int ScaleNPUOp::SetNPUInputs(const std::vector<tensor::MSTensor *> &in_tensors,
if (in_tensors.size() > 2 && in_tensors[2] != nullptr) {
auto bias_shape = in_tensors[2]->shape();
std::shared_ptr<ge::Tensor> bias_tensor = std::shared_ptr<ge::Tensor>(new (std::nothrow) ge::Tensor());
auto bias_tensor = ConverterToNPUTensor(in_tensors[2]);
if (bias_tensor == nullptr) {
MS_LOG(ERROR) << "new bias_tensor failed.";
MS_LOG(ERROR) << "Get bias_tensor failed.";
return RET_ERROR;
}
ge::TensorDesc bias_tensor_desc(ConverterToNPUShape({1, bias_shape[0], 1, 1}), ge::FORMAT_NCHW,
ConverterToNPUDataType(in_tensors[2]->data_type()));
bias_tensor->SetTensorDesc(bias_tensor_desc);
bias_tensor->SetData(reinterpret_cast<const uint8_t *>(in_tensors[2]->data()), in_tensors[2]->Size());
scale_tensor->SetTensorDesc(ge::TensorDesc(ConverterToNPUShape({1, bias_shape[0], 1, 1})));
bias_ = new (std::nothrow) hiai::op::Const(name_ + "_beta");
if (bias_ == nullptr) {
MS_LOG(ERROR) << "New beta_ const failed.";

View File

@ -28,6 +28,9 @@ DeConvWinogradFp16CPUKernel::~DeConvWinogradFp16CPUKernel() {
}
void DeConvWinogradFp16CPUKernel::FreeResizeBuf() {
if (deconv_param_ == nullptr) {
return;
}
for (int i = 0; i < deconv_param_->compute_size_; i++) {
DeConvComputeUnit &unit = deconv_param_->compute_units_[i];
if (unit.tmp_buffer_ != nullptr) {