forked from mindspore-Ecosystem/mindspore
!19336 [MSLITE][DEVELOP] fix bug of npu+fp16, convert const tensor data from fp16 -> fp32
Merge pull request !19336 from yangruoqi713/npu
This commit is contained in:
commit
a65fca39f7
|
@ -15,8 +15,45 @@
|
|||
*/
|
||||
|
||||
#include "src/delegate/npu/npu_converter_utils.h"
|
||||
#include <arm_neon.h>
|
||||
#include "src/common/log_adapter.h"
|
||||
namespace mindspore {
|
||||
#define C8NUM 8
|
||||
#ifdef ENABLE_ARM64
|
||||
void Float32ToFloat16(const float *__restrict input, float16_t *__restrict output, int number) {
|
||||
int count = (number & ~(C8NUM - 1));
|
||||
int i = 0;
|
||||
for (; i < count; i += C8NUM) {
|
||||
float32x4_t in1 = vld1q_f32(input + i);
|
||||
float16x4_t out1 = vcvt_f16_f32(in1);
|
||||
float32x4_t in2 = vld1q_f32(input + i + 4);
|
||||
float16x4_t out2 = vcvt_f16_f32(in2);
|
||||
float16x8_t out = vcombine_f16(out1, out2);
|
||||
vst1q_f16(output + i, out);
|
||||
}
|
||||
for (; i < number; ++i) {
|
||||
output[i] = static_cast<float16_t>(input[i]);
|
||||
}
|
||||
}
|
||||
|
||||
void Float16ToFloat32(const float16_t *__restrict input, float *__restrict output, int number) {
|
||||
int count = number & ~(C8NUM - 1);
|
||||
int i = 0;
|
||||
for (; i < count; i += C8NUM) {
|
||||
float16x8_t in = vld1q_f16(input + i);
|
||||
float16x4_t in1 = vget_low_f16(in);
|
||||
float16x4_t in2 = vget_high_f16(in);
|
||||
float32x4_t out1 = vcvt_f32_f16(in1);
|
||||
vst1q_f32(output + i, out1);
|
||||
float32x4_t out2 = vcvt_f32_f16(in2);
|
||||
vst1q_f32(output + i + 4, out2);
|
||||
}
|
||||
for (; i < number; ++i) {
|
||||
output[i] = static_cast<float>(input[i]);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
ge::Shape ConverterToNPUShape(const std::vector<int> &src_shape) {
|
||||
vector<int64_t> shapes;
|
||||
shapes.reserve(src_shape.size());
|
||||
|
@ -50,10 +87,8 @@ ge::DataType ConverterToNPUDataType(TypeId type_id) {
|
|||
switch (type_id) {
|
||||
case kNumberTypeFloat:
|
||||
case kNumberTypeFloat32:
|
||||
data_type = ge::DT_FLOAT;
|
||||
break;
|
||||
case kNumberTypeFloat16:
|
||||
data_type = ge::DT_FLOAT16;
|
||||
data_type = ge::DT_FLOAT;
|
||||
break;
|
||||
case kNumberTypeInt8:
|
||||
data_type = ge::DT_INT8;
|
||||
|
@ -93,7 +128,7 @@ std::shared_ptr<ge::Tensor> ConverterToNPUTensor(tensor::MSTensor *src) {
|
|||
std::shared_ptr<ge::Tensor> ge_tensor = std::shared_ptr<ge::Tensor>(new (std::nothrow) ge::Tensor());
|
||||
if (ge_tensor == nullptr) {
|
||||
MS_LOG(ERROR) << "new ge_tensor failed.";
|
||||
return ge_tensor;
|
||||
return nullptr;
|
||||
}
|
||||
ge::TensorDesc tensor_desc(ConverterToNPUShape(src->shape()), ge::FORMAT_NCHW,
|
||||
ConverterToNPUDataType(src->data_type()));
|
||||
|
@ -101,7 +136,20 @@ std::shared_ptr<ge::Tensor> ConverterToNPUTensor(tensor::MSTensor *src) {
|
|||
ge_tensor->SetTensorDesc(tensor_desc);
|
||||
|
||||
if (src->data() != nullptr) {
|
||||
ge_tensor->SetData(reinterpret_cast<const uint8_t *>(src->data()), src->Size());
|
||||
if (src->data_type() == kNumberTypeFloat16) {
|
||||
#ifdef ENABLE_ARM64
|
||||
auto fp32_data = malloc(src->ElementsNum() * sizeof(float));
|
||||
Float16ToFloat32(reinterpret_cast<float16_t *>(src->data()), reinterpret_cast<float *>(fp32_data),
|
||||
src->ElementsNum());
|
||||
ge_tensor->SetData(reinterpret_cast<const uint8_t *>(fp32_data), src->ElementsNum() * sizeof(float));
|
||||
free(fp32_data);
|
||||
#else
|
||||
MS_LOG(ERROR) << "This platform does not support fp16.";
|
||||
return nullptr;
|
||||
#endif
|
||||
} else {
|
||||
ge_tensor->SetData(reinterpret_cast<const uint8_t *>(src->data()), src->Size());
|
||||
}
|
||||
}
|
||||
return ge_tensor;
|
||||
}
|
||||
|
|
|
@ -217,6 +217,17 @@ NPUOp *NPUDelegate::GetOP(kernel::Kernel *kernel, const schema::Primitive *primi
|
|||
return nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
for (auto tensor : in_tensors) {
|
||||
if (tensor->data_type() == kNumberTypeFloat16 && tensor->data() == nullptr) {
|
||||
tensor->set_data_type(kNumberTypeFloat32);
|
||||
}
|
||||
}
|
||||
for (auto tensor : out_tensors) {
|
||||
if (tensor->data_type() == kNumberTypeFloat16) {
|
||||
tensor->set_data_type(kNumberTypeFloat32);
|
||||
}
|
||||
}
|
||||
return npu_op;
|
||||
}
|
||||
|
||||
|
|
|
@ -45,7 +45,7 @@ int BatchnormNPUOp::SetNPUInputs(const std::vector<tensor::MSTensor *> &in_tenso
|
|||
MS_LOG(ERROR) << "New scale const failed.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
auto scale_tensor = mindspore::ConverterToNPUTensor(in_tensors[1]);
|
||||
auto scale_tensor = ConverterToNPUTensor(in_tensors[1]);
|
||||
scale->set_attr_value(scale_tensor);
|
||||
batchnorm_->set_input_scale(*scale);
|
||||
|
||||
|
@ -54,7 +54,7 @@ int BatchnormNPUOp::SetNPUInputs(const std::vector<tensor::MSTensor *> &in_tenso
|
|||
MS_LOG(ERROR) << "New offset const failed.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
auto offset_tensor = mindspore::ConverterToNPUTensor(in_tensors[2]);
|
||||
auto offset_tensor = ConverterToNPUTensor(in_tensors[2]);
|
||||
offset->set_attr_value(offset_tensor);
|
||||
batchnorm_->set_input_offset(*offset);
|
||||
|
||||
|
@ -63,7 +63,7 @@ int BatchnormNPUOp::SetNPUInputs(const std::vector<tensor::MSTensor *> &in_tenso
|
|||
MS_LOG(ERROR) << "New mean const failed.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
auto mean_tensor = mindspore::ConverterToNPUTensor(in_tensors[3]);
|
||||
auto mean_tensor = ConverterToNPUTensor(in_tensors[3]);
|
||||
mean->set_attr_value(mean_tensor);
|
||||
batchnorm_->set_input_mean(*mean);
|
||||
|
||||
|
@ -72,7 +72,7 @@ int BatchnormNPUOp::SetNPUInputs(const std::vector<tensor::MSTensor *> &in_tenso
|
|||
MS_LOG(ERROR) << "New variance const failed.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
auto variance_tensor = mindspore::ConverterToNPUTensor(in_tensors[4]);
|
||||
auto variance_tensor = ConverterToNPUTensor(in_tensors[4]);
|
||||
variance->set_attr_value(variance_tensor);
|
||||
batchnorm_->set_input_variance(*variance);
|
||||
return RET_OK;
|
||||
|
|
|
@ -17,6 +17,8 @@
|
|||
#include "src/delegate/npu/op/convolution_base_npu.h"
|
||||
#include "src/delegate/npu/npu_converter_utils.h"
|
||||
#include "src/delegate/npu/transpose_kernel.h"
|
||||
#include "nnacl/fp16/cast_fp16.h"
|
||||
|
||||
namespace mindspore {
|
||||
ConvolutionBaseNPUOp::~ConvolutionBaseNPUOp() {
|
||||
if (act_ != nullptr) {
|
||||
|
@ -40,13 +42,20 @@ int ConvolutionBaseNPUOp::InitWeightConst(const std::vector<tensor::MSTensor *>
|
|||
return RET_ERROR;
|
||||
}
|
||||
auto w_shape = inputs[1]->shape();
|
||||
auto nhwc_data = inputs[1]->data();
|
||||
auto origin_data = inputs[1]->data();
|
||||
auto fp32_data = origin_data;
|
||||
if (inputs[1]->data_type() == kNumberTypeFloat16) {
|
||||
fp32_data = reinterpret_cast<float *>(malloc(inputs[1]->ElementsNum() * sizeof(float)));
|
||||
// fp16->fp32
|
||||
Float16ToFloat32(reinterpret_cast<float16_t *>(origin_data), reinterpret_cast<float *>(fp32_data),
|
||||
inputs[1]->ElementsNum());
|
||||
}
|
||||
auto nchw_data = reinterpret_cast<float *>(malloc(inputs[1]->ElementsNum() * sizeof(float)));
|
||||
if (nchw_data == nullptr) {
|
||||
MS_LOG(ERROR) << "Malloc buffer failed.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
PackNHWCToNCHWFp32(nhwc_data, nchw_data, w_shape[0], w_shape[1] * w_shape[2], w_shape[3]);
|
||||
PackNHWCToNCHWFp32(fp32_data, nchw_data, w_shape[0], w_shape[1] * w_shape[2], w_shape[3]);
|
||||
|
||||
std::shared_ptr<ge::Tensor> weight_tensor = std::shared_ptr<ge::Tensor>(new (std::nothrow) ge::Tensor());
|
||||
if (weight_tensor == nullptr) {
|
||||
|
@ -56,7 +65,7 @@ int ConvolutionBaseNPUOp::InitWeightConst(const std::vector<tensor::MSTensor *>
|
|||
ge::TensorDesc tensor_desc(ConverterToNPUShape({w_shape[0], w_shape[3], w_shape[1], w_shape[2]}), ge::FORMAT_NCHW,
|
||||
ConverterToNPUDataType(inputs[1]->data_type()));
|
||||
weight_tensor->SetTensorDesc(tensor_desc);
|
||||
weight_tensor->SetData(reinterpret_cast<const uint8_t *>(nchw_data), inputs[1]->Size());
|
||||
weight_tensor->SetData(reinterpret_cast<const uint8_t *>(nchw_data), inputs[1]->ElementsNum() * sizeof(float));
|
||||
|
||||
weight_->set_attr_value(weight_tensor);
|
||||
free(nchw_data);
|
||||
|
@ -70,15 +79,11 @@ int ConvolutionBaseNPUOp::InitBiasConst(const std::vector<tensor::MSTensor *> &i
|
|||
MS_LOG(ERROR) << "New bias const failed.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
std::shared_ptr<ge::Tensor> bias_tensor = std::shared_ptr<ge::Tensor>(new (std::nothrow) ge::Tensor());
|
||||
std::shared_ptr<ge::Tensor> bias_tensor = ConverterToNPUTensor(inputs[2]);
|
||||
if (bias_tensor == nullptr) {
|
||||
MS_LOG(ERROR) << "new bias_tensor failed.";
|
||||
MS_LOG(ERROR) << "Get bias_tensor failed.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
ge::TensorDesc tensor_desc(ConverterToNPUShape({inputs[2]->shape()[0]}), ge::FORMAT_NCHW,
|
||||
ConverterToNPUDataType(inputs[2]->data_type()));
|
||||
bias_tensor->SetTensorDesc(tensor_desc);
|
||||
bias_tensor->SetData(reinterpret_cast<const uint8_t *>(inputs[2]->data()), inputs[2]->Size());
|
||||
bias_->set_attr_value(bias_tensor);
|
||||
}
|
||||
return RET_OK;
|
||||
|
|
|
@ -139,6 +139,11 @@ NPUOp *GetNPUConvOp(const schema::Primitive *primitive, const std::vector<tensor
|
|||
return nullptr;
|
||||
}
|
||||
|
||||
if (in_tensors[0]->data_type() != kNumberTypeFloat32 && in_tensors[0]->data_type() != kNumberTypeFloat16) {
|
||||
MS_LOG(ERROR) << "Npu does not support datatype " << in_tensors[0]->data_type();
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
NPUOp *op = nullptr;
|
||||
auto conv_prim = primitive->value_as_Conv2DFusion();
|
||||
auto group = static_cast<int>(conv_prim->group());
|
||||
|
|
|
@ -65,7 +65,7 @@ int FullconnectionNPUOp::SetNPUInputs(const std::vector<tensor::MSTensor *> &in_
|
|||
MS_LOG(ERROR) << "New weight const failed.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
auto weight_tensor = mindspore::ConverterToNPUTensor(in_tensors[1]);
|
||||
auto weight_tensor = ConverterToNPUTensor(in_tensors[1]);
|
||||
weight_->set_attr_value(weight_tensor);
|
||||
fc_->set_input_x2(*weight_).set_attr_transpose_x2(true);
|
||||
|
||||
|
|
|
@ -41,15 +41,13 @@ int InstanceNormNPUOp::SetNPUInputs(const std::vector<tensor::MSTensor *> &in_te
|
|||
instance_norm_->set_input_x(*npu_inputs[0]);
|
||||
|
||||
auto gamma_shape = in_tensors[1]->shape();
|
||||
std::shared_ptr<ge::Tensor> gamma_tensor = std::shared_ptr<ge::Tensor>(new (std::nothrow) ge::Tensor());
|
||||
auto gamma_tensor = ConverterToNPUTensor(in_tensors[1]);
|
||||
if (gamma_tensor == nullptr) {
|
||||
MS_LOG(ERROR) << "new gamma_tensor failed.";
|
||||
MS_LOG(ERROR) << "Get gamma_tensor failed.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
ge::TensorDesc gamma_tensor_desc(ConverterToNPUShape({1, gamma_shape[0], 1, 1}), ge::FORMAT_NCHW,
|
||||
ConverterToNPUDataType(in_tensors[1]->data_type()));
|
||||
gamma_tensor->SetTensorDesc(gamma_tensor_desc);
|
||||
gamma_tensor->SetData(reinterpret_cast<const uint8_t *>(in_tensors[1]->data()), in_tensors[1]->Size());
|
||||
gamma_tensor->SetTensorDesc(ge::TensorDesc(ConverterToNPUShape({1, gamma_shape[0], 1, 1})));
|
||||
|
||||
gamma_ = new (std::nothrow) hiai::op::Const(name_ + "_gamma");
|
||||
if (gamma_ == nullptr) {
|
||||
MS_LOG(ERROR) << "New gamma_ const failed.";
|
||||
|
@ -59,15 +57,13 @@ int InstanceNormNPUOp::SetNPUInputs(const std::vector<tensor::MSTensor *> &in_te
|
|||
instance_norm_->set_input_gamma(*gamma_);
|
||||
|
||||
auto beta_shape = in_tensors[2]->shape();
|
||||
std::shared_ptr<ge::Tensor> beta_tensor = std::shared_ptr<ge::Tensor>(new (std::nothrow) ge::Tensor());
|
||||
auto beta_tensor = ConverterToNPUTensor(in_tensors[2]);
|
||||
if (beta_tensor == nullptr) {
|
||||
MS_LOG(ERROR) << "new beta_tensor failed.";
|
||||
MS_LOG(ERROR) << "Get beta_tensor failed.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
ge::TensorDesc beta_tensor_desc(ConverterToNPUShape({1, beta_shape[0], 1, 1}), ge::FORMAT_NCHW,
|
||||
ConverterToNPUDataType(in_tensors[2]->data_type()));
|
||||
beta_tensor->SetTensorDesc(beta_tensor_desc);
|
||||
beta_tensor->SetData(reinterpret_cast<const uint8_t *>(in_tensors[2]->data()), in_tensors[2]->Size());
|
||||
beta_tensor->SetTensorDesc(ge::TensorDesc(ConverterToNPUShape({1, beta_shape[0], 1, 1})));
|
||||
|
||||
beta_ = new (std::nothrow) hiai::op::Const(name_ + "_beta");
|
||||
if (beta_ == nullptr) {
|
||||
MS_LOG(ERROR) << "New beta_ const failed.";
|
||||
|
|
|
@ -61,18 +61,18 @@ int MatMulNPUOp::SetNPUInputs(const std::vector<tensor::MSTensor *> &in_tensors,
|
|||
}
|
||||
add_op_->set_input_x1(*matmul_);
|
||||
auto bias_shape = in_tensors[2]->shape();
|
||||
auto bias_tensor = std::make_shared<ge::Tensor>();
|
||||
auto bias_tensor = ConverterToNPUTensor(in_tensors[2]);
|
||||
if (bias_tensor == nullptr) {
|
||||
MS_LOG(ERROR) << "new bias_tensor failed.";
|
||||
MS_LOG(ERROR) << "Get bias_tensor failed.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
ge::TensorDesc bias_tensor_desc(ConverterToNPUShape({1, bias_shape[0], 1, 1}), ge::FORMAT_NCHW,
|
||||
ConverterToNPUDataType(in_tensors[2]->data_type()));
|
||||
|
||||
ge::TensorDesc bias_tensor_desc(ConverterToNPUShape({1, bias_shape[0], 1, 1}));
|
||||
if (out_tensors[0]->shape().size() == 2) {
|
||||
bias_tensor_desc.SetShape(ConverterToNPUShape({1, bias_shape[0]}));
|
||||
}
|
||||
bias_tensor->SetTensorDesc(bias_tensor_desc);
|
||||
bias_tensor->SetData(reinterpret_cast<const uint8_t *>(in_tensors[2]->data()), in_tensors[2]->Size());
|
||||
|
||||
bias_ = new (std::nothrow) hiai::op::Const(name_ + "_bias");
|
||||
if (bias_ == nullptr) {
|
||||
MS_LOG(ERROR) << "new bias const failed.";
|
||||
|
|
|
@ -126,7 +126,8 @@ NPUOp *GetNPUOp(const schema::Primitive *primitive, const std::vector<tensor::MS
|
|||
std::set<schema::PrimitiveType> int32_lists = {schema::PrimitiveType_Cast, schema::PrimitiveType_StridedSlice};
|
||||
auto support_int32 = in_tensors[0]->data_type() == kNumberTypeInt32 &&
|
||||
find(int32_lists.begin(), int32_lists.end(), primitive->value_type()) != int32_lists.end();
|
||||
if (in_tensors[0]->data_type() != kNumberTypeFloat32 && !support_int32) {
|
||||
if (in_tensors[0]->data_type() != kNumberTypeFloat32 && in_tensors[0]->data_type() != kNumberTypeFloat16 &&
|
||||
!support_int32) {
|
||||
MS_LOG(ERROR) << "Npu does not support datatype " << in_tensors[0]->data_type() << " for op type "
|
||||
<< primitive->value_type();
|
||||
return nullptr;
|
||||
|
|
|
@ -67,16 +67,14 @@ int ScaleNPUOp::SetNPUInputs(const std::vector<tensor::MSTensor *> &in_tensors,
|
|||
const std::vector<ge::Operator *> &npu_inputs) {
|
||||
op_->set_input_x(*npu_inputs.at(0));
|
||||
MS_ASSERT(in_tensors.size() > 1);
|
||||
auto scale_shape = in_tensors.at(1)->shape();
|
||||
std::shared_ptr<ge::Tensor> scale_tensor = std::shared_ptr<ge::Tensor>(new (std::nothrow) ge::Tensor());
|
||||
auto scale_shape = in_tensors[1]->shape();
|
||||
auto scale_tensor = ConverterToNPUTensor(in_tensors[1]);
|
||||
if (scale_tensor == nullptr) {
|
||||
MS_LOG(ERROR) << "new scale_tensor failed.";
|
||||
MS_LOG(ERROR) << "Get scale_tensor failed.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
ge::TensorDesc scale_tensor_desc(ConverterToNPUShape({1, scale_shape[0], 1, 1}), ge::FORMAT_NCHW,
|
||||
ConverterToNPUDataType(in_tensors[1]->data_type()));
|
||||
scale_tensor->SetTensorDesc(scale_tensor_desc);
|
||||
scale_tensor->SetData(reinterpret_cast<const uint8_t *>(in_tensors[1]->data()), in_tensors[1]->Size());
|
||||
scale_tensor->SetTensorDesc(ge::TensorDesc(ConverterToNPUShape({1, scale_shape[0], 1, 1})));
|
||||
|
||||
scale_ = new (std::nothrow) hiai::op::Const(name_ + "_scale");
|
||||
if (scale_ == nullptr) {
|
||||
MS_LOG(ERROR) << "New scale_ const failed.";
|
||||
|
@ -87,15 +85,13 @@ int ScaleNPUOp::SetNPUInputs(const std::vector<tensor::MSTensor *> &in_tensors,
|
|||
|
||||
if (in_tensors.size() > 2 && in_tensors[2] != nullptr) {
|
||||
auto bias_shape = in_tensors[2]->shape();
|
||||
std::shared_ptr<ge::Tensor> bias_tensor = std::shared_ptr<ge::Tensor>(new (std::nothrow) ge::Tensor());
|
||||
auto bias_tensor = ConverterToNPUTensor(in_tensors[2]);
|
||||
if (bias_tensor == nullptr) {
|
||||
MS_LOG(ERROR) << "new bias_tensor failed.";
|
||||
MS_LOG(ERROR) << "Get bias_tensor failed.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
ge::TensorDesc bias_tensor_desc(ConverterToNPUShape({1, bias_shape[0], 1, 1}), ge::FORMAT_NCHW,
|
||||
ConverterToNPUDataType(in_tensors[2]->data_type()));
|
||||
bias_tensor->SetTensorDesc(bias_tensor_desc);
|
||||
bias_tensor->SetData(reinterpret_cast<const uint8_t *>(in_tensors[2]->data()), in_tensors[2]->Size());
|
||||
scale_tensor->SetTensorDesc(ge::TensorDesc(ConverterToNPUShape({1, bias_shape[0], 1, 1})));
|
||||
|
||||
bias_ = new (std::nothrow) hiai::op::Const(name_ + "_beta");
|
||||
if (bias_ == nullptr) {
|
||||
MS_LOG(ERROR) << "New beta_ const failed.";
|
||||
|
|
|
@ -28,6 +28,9 @@ DeConvWinogradFp16CPUKernel::~DeConvWinogradFp16CPUKernel() {
|
|||
}
|
||||
|
||||
void DeConvWinogradFp16CPUKernel::FreeResizeBuf() {
|
||||
if (deconv_param_ == nullptr) {
|
||||
return;
|
||||
}
|
||||
for (int i = 0; i < deconv_param_->compute_size_; i++) {
|
||||
DeConvComputeUnit &unit = deconv_param_->compute_units_[i];
|
||||
if (unit.tmp_buffer_ != nullptr) {
|
||||
|
|
Loading…
Reference in New Issue