forked from mindspore-Ecosystem/mindspore
!20908 fix some defects of codegen
Merge pull request !20908 from zhanyuan/codex_main
This commit is contained in:
commit
8257b469f5
|
@ -558,8 +558,8 @@ void CalcInputSums(int8_t *input, int row, int col, int weight_zp, int *dst, Dat
|
||||||
}
|
}
|
||||||
|
|
||||||
// dst: bias + depth*input_zp*weight_zp - input_zp*weight_col_sums
|
// dst: bias + depth*input_zp*weight_zp - input_zp*weight_col_sums
|
||||||
void CalcWeightBiasSums(int8_t *weight, int row, int col, int input_zp, int *weight_zp_ptr, const int *bias, int *dst,
|
void CalcWeightBiasSums(int8_t *weight, int row, int col, int input_zp, const int *weight_zp_ptr, const int *bias,
|
||||||
DataOrder order, bool filter_per_channel) {
|
int *dst, DataOrder order, bool filter_per_channel) {
|
||||||
for (int c = 0; c < col; ++c) {
|
for (int c = 0; c < col; ++c) {
|
||||||
int sum = 0;
|
int sum = 0;
|
||||||
for (int r = 0; r < row; ++r) {
|
for (int r = 0; r < row; ++r) {
|
||||||
|
|
|
@ -31,8 +31,8 @@ void MatMulInt8_16x4(const int8_t *a, const int8_t *b, int *dst, int row_4, int
|
||||||
void RowMajor2Row16x4MajorInt8(const int8_t *src_ptr, int8_t *dst_ptr, int row, int col);
|
void RowMajor2Row16x4MajorInt8(const int8_t *src_ptr, int8_t *dst_ptr, int row, int col);
|
||||||
void RowMajor2Col16x4MajorInt8(int8_t *src, int row, int col, int8_t *dst);
|
void RowMajor2Col16x4MajorInt8(int8_t *src, int row, int col, int8_t *dst);
|
||||||
void CalcInputSums(int8_t *input, int row, int col, int weight_zp, int *dst, DataOrder order);
|
void CalcInputSums(int8_t *input, int row, int col, int weight_zp, int *dst, DataOrder order);
|
||||||
void CalcWeightBiasSums(int8_t *weight, int row, int col, int input_zp, int *weight_zp_ptr, const int *bias, int *dst,
|
void CalcWeightBiasSums(int8_t *weight, int row, int col, int input_zp, const int *weight_zp_ptr, const int *bias,
|
||||||
DataOrder order, bool filter_per_channel);
|
int *dst, DataOrder order, bool filter_per_channel);
|
||||||
void MatmulInt8Opt(const int8_t *a, const int8_t *b, int8_t *dst, int row, int col, int deep16, const int *a_sums,
|
void MatmulInt8Opt(const int8_t *a, const int8_t *b, int8_t *dst, int row, int col, int deep16, const int *a_sums,
|
||||||
const int *bias, int act_min, int act_max, int out_zp, const int32_t *multiplier,
|
const int *bias, int act_min, int act_max, int out_zp, const int32_t *multiplier,
|
||||||
const int32_t *left_shift, const int32_t *right_shift, size_t stride, size_t filter_peroc,
|
const int32_t *left_shift, const int32_t *right_shift, size_t stride, size_t filter_peroc,
|
||||||
|
|
|
@ -188,14 +188,22 @@ void CodeGraphQuantArgsState(std::ofstream &ofs) {
|
||||||
|
|
||||||
void CodeGraphQuantArgsImplement(std::ofstream &ofs, const std::unique_ptr<CoderContext> &ctx) {
|
void CodeGraphQuantArgsImplement(std::ofstream &ofs, const std::unique_ptr<CoderContext> &ctx) {
|
||||||
std::vector<Tensor *> graph_inputs = ctx->graph_inputs();
|
std::vector<Tensor *> graph_inputs = ctx->graph_inputs();
|
||||||
|
if (graph_inputs.empty()) {
|
||||||
|
MS_LOG(ERROR) << "graph input tensors' number is 0";
|
||||||
|
return;
|
||||||
|
}
|
||||||
Tensor *in_tensor = graph_inputs.at(kInputIndex);
|
Tensor *in_tensor = graph_inputs.at(kInputIndex);
|
||||||
MS_CHECK_PTR_IF_NULL(in_tensor);
|
MS_CHECK_PTR_IF_NULL(in_tensor);
|
||||||
std::vector<Tensor *> graph_outputs = ctx->graph_outputs();
|
std::vector<Tensor *> graph_outputs = ctx->graph_outputs();
|
||||||
|
if (graph_outputs.empty()) {
|
||||||
|
MS_LOG(ERROR) << "graph output tensors' number is 0";
|
||||||
|
return;
|
||||||
|
}
|
||||||
Tensor *out_tensor = graph_outputs.at(kOutputIndex);
|
Tensor *out_tensor = graph_outputs.at(kOutputIndex);
|
||||||
MS_CHECK_PTR_IF_NULL(out_tensor);
|
MS_CHECK_PTR_IF_NULL(out_tensor);
|
||||||
std::vector<QuantArg> in_quant_args = in_tensor->quant_params();
|
std::vector<QuantArg> in_quant_args = in_tensor->quant_params();
|
||||||
std::vector<QuantArg> out_quant_args = out_tensor->quant_params();
|
std::vector<QuantArg> out_quant_args = out_tensor->quant_params();
|
||||||
if (graph_inputs.empty() || graph_outputs.empty() || in_quant_args.empty() || out_quant_args.empty()) {
|
if (in_quant_args.empty() || out_quant_args.empty()) {
|
||||||
MS_LOG(ERROR) << "code model quant args failed";
|
MS_LOG(ERROR) << "code model quant args failed";
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
|
@ -150,11 +150,13 @@ int Conv2DInt8Coder::InitTmpBuffer() {
|
||||||
switch (opt_) {
|
switch (opt_) {
|
||||||
case Basic:
|
case Basic:
|
||||||
buffer_size_ =
|
buffer_size_ =
|
||||||
(2 * input_tensor_->Channel() * filter_tensor_->Width() * filter_tensor_->Height()) * (int32_t)sizeof(int16_t);
|
static_cast<size_t>(2 * input_tensor_->Channel() * filter_tensor_->Width() * filter_tensor_->Height()) *
|
||||||
|
sizeof(int16_t);
|
||||||
break;
|
break;
|
||||||
case Convolve_1_x_n:
|
case Convolve_1_x_n:
|
||||||
buffer_size_ =
|
buffer_size_ =
|
||||||
(2 * input_tensor_->Channel() * filter_tensor_->Width() * filter_tensor_->Height()) * sizeof(int16_t);
|
static_cast<size_t>(2 * input_tensor_->Channel() * filter_tensor_->Width() * filter_tensor_->Height()) *
|
||||||
|
sizeof(int16_t);
|
||||||
break;
|
break;
|
||||||
case Convolve_1x1_fast:
|
case Convolve_1x1_fast:
|
||||||
// do nothing
|
// do nothing
|
||||||
|
|
|
@ -63,7 +63,7 @@ class Conv2DInt8Coder final : public Conv2DBaseCoder {
|
||||||
uint16_t output_y_{0};
|
uint16_t output_y_{0};
|
||||||
|
|
||||||
int16_t *buffer_{nullptr};
|
int16_t *buffer_{nullptr};
|
||||||
int32_t buffer_size_{0};
|
size_t buffer_size_{0};
|
||||||
|
|
||||||
ConvOpt opt_{ConvOpt::Basic};
|
ConvOpt opt_{ConvOpt::Basic};
|
||||||
};
|
};
|
||||||
|
|
|
@ -38,6 +38,7 @@ int ConvDelegateCoder::Prepare(CoderContext *const context) {
|
||||||
PopulateRegistry::GetInstance()->GetParameterCreator(GetPrimitiveType(node_->primitive_), schema_version);
|
PopulateRegistry::GetInstance()->GetParameterCreator(GetPrimitiveType(node_->primitive_), schema_version);
|
||||||
MS_CHECK_PTR(parameter_gen);
|
MS_CHECK_PTR(parameter_gen);
|
||||||
OpParameter *op_parameter = parameter_gen(node_->primitive_);
|
OpParameter *op_parameter = parameter_gen(node_->primitive_);
|
||||||
|
MS_CHECK_PTR(op_parameter);
|
||||||
op_parameter->thread_num_ = thread_num_;
|
op_parameter->thread_num_ = thread_num_;
|
||||||
conv_coder_->set_type(primitive_type);
|
conv_coder_->set_type(primitive_type);
|
||||||
conv_coder_->set_thread_num(thread_num_);
|
conv_coder_->set_thread_num(thread_num_);
|
||||||
|
@ -70,11 +71,9 @@ std::unique_ptr<OperatorCoder> CPUConvolutionFP32CoderSelect(const std::vector<T
|
||||||
int schema_version = VersionManager::GetInstance()->GetSchemaVersion();
|
int schema_version = VersionManager::GetInstance()->GetSchemaVersion();
|
||||||
ParameterGen paramGen =
|
ParameterGen paramGen =
|
||||||
PopulateRegistry::GetInstance()->GetParameterCreator(GetPrimitiveType(node->primitive_), schema_version);
|
PopulateRegistry::GetInstance()->GetParameterCreator(GetPrimitiveType(node->primitive_), schema_version);
|
||||||
if (paramGen == nullptr) {
|
MS_CHECK_PTR_RET_NULL(paramGen);
|
||||||
MS_LOG(ERROR) << "parameter generator is null";
|
|
||||||
return nullptr;
|
|
||||||
}
|
|
||||||
auto conv_param = reinterpret_cast<ConvParameter *>(paramGen(node->primitive_));
|
auto conv_param = reinterpret_cast<ConvParameter *>(paramGen(node->primitive_));
|
||||||
|
MS_CHECK_PTR_RET_NULL(conv_param);
|
||||||
int kernel_h = conv_param->kernel_h_;
|
int kernel_h = conv_param->kernel_h_;
|
||||||
int kernel_w = conv_param->kernel_w_;
|
int kernel_w = conv_param->kernel_w_;
|
||||||
conv_param->input_h_ = in_tensors.at(kInputIndex)->Height();
|
conv_param->input_h_ = in_tensors.at(kInputIndex)->Height();
|
||||||
|
|
|
@ -63,7 +63,9 @@ int ConcatInt8Coder::Prepare(CoderContext *const context) {
|
||||||
auto in_shape = input_tensors_.at(i)->shape();
|
auto in_shape = input_tensors_.at(i)->shape();
|
||||||
concat_param_->input_shapes_[i] = reinterpret_cast<int *>(malloc(in_shape.size() * sizeof(int)));
|
concat_param_->input_shapes_[i] = reinterpret_cast<int *>(malloc(in_shape.size() * sizeof(int)));
|
||||||
MS_CHECK_PTR(concat_param_->input_shapes_[i]);
|
MS_CHECK_PTR(concat_param_->input_shapes_[i]);
|
||||||
memcpy(reinterpret_cast<void *>(concat_param_->input_shapes_[i]), in_shape.data(), sizeof(int) * in_shape.size());
|
MS_CHECK_RET_CODE(memcpy_s(reinterpret_cast<void *>(concat_param_->input_shapes_[i]), sizeof(int) * in_shape.size(),
|
||||||
|
in_shape.data(), sizeof(int) * in_shape.size()),
|
||||||
|
"memcpy_s failed");
|
||||||
}
|
}
|
||||||
|
|
||||||
before_axis_size = 1;
|
before_axis_size = 1;
|
||||||
|
@ -75,8 +77,9 @@ int ConcatInt8Coder::Prepare(CoderContext *const context) {
|
||||||
int output_dim = static_cast<int>(output_tensor_->shape().size());
|
int output_dim = static_cast<int>(output_tensor_->shape().size());
|
||||||
concat_param_->output_shapes_ = reinterpret_cast<int *>(malloc(output_dim * sizeof(int)));
|
concat_param_->output_shapes_ = reinterpret_cast<int *>(malloc(output_dim * sizeof(int)));
|
||||||
MS_CHECK_PTR(concat_param_->output_shapes_);
|
MS_CHECK_PTR(concat_param_->output_shapes_);
|
||||||
memcpy_s(reinterpret_cast<void *>(concat_param_->output_shapes_), output_dim * sizeof(int),
|
MS_CHECK_RET_CODE(memcpy_s(reinterpret_cast<void *>(concat_param_->output_shapes_), output_dim * sizeof(int),
|
||||||
output_tensor_->shape().data(), sizeof(int) * output_dim);
|
output_tensor_->shape().data(), sizeof(int) * output_dim),
|
||||||
|
"memcpy_s failed");
|
||||||
for (int i = axis_ + 1; i < output_dim; i++) {
|
for (int i = axis_ + 1; i < output_dim; i++) {
|
||||||
after_axis_size *= concat_param_->output_shapes_[i];
|
after_axis_size *= concat_param_->output_shapes_[i];
|
||||||
}
|
}
|
||||||
|
|
|
@ -17,7 +17,7 @@
|
||||||
#include "wrapper/fp32/matmul_fp32_wrapper.h"
|
#include "wrapper/fp32/matmul_fp32_wrapper.h"
|
||||||
void InitMatrixA(const float *src_ptr, float *dst_ptr, const MatMulParameter *params_, bool is_vector_a) {
|
void InitMatrixA(const float *src_ptr, float *dst_ptr, const MatMulParameter *params_, bool is_vector_a) {
|
||||||
if (is_vector_a) {
|
if (is_vector_a) {
|
||||||
memcpy(dst_ptr, src_ptr, params_->batch * params_->deep_ * sizeof(float));
|
memcpy(dst_ptr, src_ptr, (size_t)(params_->batch * params_->deep_) * sizeof(float));
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
for (int i = 0; i < params_->batch; i++) {
|
for (int i = 0; i < params_->batch; i++) {
|
||||||
|
@ -34,7 +34,7 @@ void InitMatrixA(const float *src_ptr, float *dst_ptr, const MatMulParameter *pa
|
||||||
void InitMatrixB(const float *src_ptr, float *dst_ptr, const MatMulParameter *params_, bool is_vector_a) {
|
void InitMatrixB(const float *src_ptr, float *dst_ptr, const MatMulParameter *params_, bool is_vector_a) {
|
||||||
if (is_vector_a) {
|
if (is_vector_a) {
|
||||||
if (params_->b_transpose_) {
|
if (params_->b_transpose_) {
|
||||||
memcpy(dst_ptr, src_ptr, params_->batch * params_->col_ * params_->deep_ * sizeof(float));
|
memcpy(dst_ptr, src_ptr, (size_t)(params_->batch * params_->col_ * params_->deep_) * sizeof(float));
|
||||||
} else {
|
} else {
|
||||||
for (int i = 0; i < params_->batch; i++) {
|
for (int i = 0; i < params_->batch; i++) {
|
||||||
const float *src = src_ptr + i * params_->deep_ * params_->col_;
|
const float *src = src_ptr + i * params_->deep_ * params_->col_;
|
||||||
|
|
|
@ -66,7 +66,7 @@ int ConvInit(int8_t *origin_weight, const int32_t *ori_bias, const int32_t *filt
|
||||||
}
|
}
|
||||||
memset(bias_data_, 0, bias_size);
|
memset(bias_data_, 0, bias_size);
|
||||||
if (ori_bias != NULL) {
|
if (ori_bias != NULL) {
|
||||||
memcpy(bias_data_, ori_bias, output_channel * sizeof(int32_t));
|
memcpy(bias_data_, ori_bias, (unsigned int)output_channel * sizeof(int32_t));
|
||||||
}
|
}
|
||||||
|
|
||||||
for (int oc = 0; oc < output_channel; oc++) {
|
for (int oc = 0; oc < output_channel; oc++) {
|
||||||
|
|
|
@ -31,7 +31,7 @@ void InitInt8MatrixA(int8_t *src_ptr, int32_t *input_sums, int8_t *dst_ptr, int
|
||||||
}
|
}
|
||||||
|
|
||||||
void InitInt8MatrixB(int8_t *weight_ptr, int32_t *weight_bias_sums_batch_, int8_t *dst_ptr, int batch, int deep,
|
void InitInt8MatrixB(int8_t *weight_ptr, int32_t *weight_bias_sums_batch_, int8_t *dst_ptr, int batch, int deep,
|
||||||
int col, int col_align, int deep_16, int input_zp, int *weight_zp, const int *bias_ptr,
|
int col, int col_align, int deep_16, int input_zp, const int *weight_zp, const int *bias_ptr,
|
||||||
bool b_transpose, bool filter_per_channel) {
|
bool b_transpose, bool filter_per_channel) {
|
||||||
for (int i = 0; i < batch; ++i) {
|
for (int i = 0; i < batch; ++i) {
|
||||||
int8_t *cur_b = weight_ptr + i * deep * col;
|
int8_t *cur_b = weight_ptr + i * deep * col;
|
||||||
|
|
|
@ -26,7 +26,7 @@ void InitInt8MatrixA(int8_t *src_ptr, int32_t *input_sums, int8_t *dst_ptr, int
|
||||||
const int *weight_zp, bool a_transpose);
|
const int *weight_zp, bool a_transpose);
|
||||||
|
|
||||||
void InitInt8MatrixB(int8_t *weight_ptr, int32_t *weight_bias_sums_batch_, int8_t *dst_ptr, int batch, int deep,
|
void InitInt8MatrixB(int8_t *weight_ptr, int32_t *weight_bias_sums_batch_, int8_t *dst_ptr, int batch, int deep,
|
||||||
int col, int col_align, int deep_16, int input_zp, int *weight_zp, const int *bias_ptr,
|
int col, int col_align, int deep_16, int input_zp, const int *weight_zp, const int *bias_ptr,
|
||||||
bool b_transpose, bool filter_per_channel);
|
bool b_transpose, bool filter_per_channel);
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
|
|
Loading…
Reference in New Issue