remove task_id && fix warning
This commit is contained in:
parent
d4a0df8a94
commit
7ffb4b12aa
|
@ -60,7 +60,7 @@ void CodeSessionCompileGraph(std::ofstream &ofs, const std::unique_ptr<CoderCont
|
|||
ofs << " MS_ERROR_IF_NULL(outputs_[" << i << "]);\n";
|
||||
}
|
||||
if (config->target() != kARM32M) {
|
||||
ofs << " int ret = Init(model->buf, dynamic_cast<MModel *>(model)->buf_size());\n"
|
||||
ofs << " int ret = Init(model->buf, static_cast<MModel *>(model)->buf_size());\n"
|
||||
" return ret;\n"
|
||||
"}\n\n";
|
||||
return;
|
||||
|
|
|
@ -188,7 +188,6 @@ mindspore::tensor::MSTensor *LiteSession::GetOutputByTensorName(const String &te
|
|||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
} // namespace lite
|
||||
)RAW";
|
||||
|
||||
|
|
|
@ -79,7 +79,6 @@ class MTensor : public mindspore::tensor::MSTensor {
|
|||
void *data_ = nullptr;
|
||||
Vector<QuantArg> quant_params_;
|
||||
};
|
||||
|
||||
} // namespace lite
|
||||
} // namespace mindspore
|
||||
|
||||
|
|
|
@ -84,7 +84,7 @@ int CoderGraph::ConvertTensors() {
|
|||
auto data_size = static_cast<size_t>(origin_tensor->data()->size());
|
||||
MS_CHECK_RET_CODE_WITH_EXE(dstTensor->MallocData(), "dst tensor malloc data failed!", delete dstTensor);
|
||||
void *dst_data = dstTensor->data_c();
|
||||
MS_CHECK_RET_CODE_WITH_EXE(memcpy_s(dst_data, data_size, origin_tensor->data()->data(), data_size),
|
||||
MS_CHECK_RET_CODE_WITH_EXE(memcpy_s(dst_data, dstTensor->Size(), origin_tensor->data()->data(), data_size),
|
||||
"memcpy_s copy data failed!", delete dstTensor);
|
||||
dstTensor->set_data(dst_data);
|
||||
}
|
||||
|
|
|
@ -15,11 +15,10 @@
|
|||
*/
|
||||
|
||||
#include "coder/opcoders/base/detection_post_process_base_coder.h"
|
||||
|
||||
#include "nnacl/int8/quant_dtype_cast_int8.h"
|
||||
|
||||
#include "coder/opcoders/file_collector.h"
|
||||
#include "coder/log.h"
|
||||
#include "coder/opcoders/parallel.h"
|
||||
#include "include/errorcode.h"
|
||||
|
||||
namespace mindspore::lite::micro {
|
||||
|
@ -143,10 +142,9 @@ int DetectionPostProcessBaseCoder::DoCode(CoderContext *const context) {
|
|||
code.CodeFunction("DetectionPostProcessRegular", num_boxes_, num_classes_with_bg_, input_scores_, output_boxes,
|
||||
output_classes, output_scores, output_num, "PartialArgSort", "¶ms");
|
||||
} else {
|
||||
int task_id = 0;
|
||||
int thread_num = 1;
|
||||
code.CodeFunction("NmsMultiClassesFastCore", num_boxes_, num_classes_with_bg_, input_scores_, "PartialArgSort",
|
||||
"¶ms", task_id, thread_num);
|
||||
"¶ms", kDefaultTaskId, thread_num);
|
||||
|
||||
code.CodeFunction("DetectionPostProcessFast", num_boxes_, num_classes_with_bg_, input_scores_,
|
||||
"(float *)(params.decoded_boxes_)", output_boxes, output_classes, output_scores, output_num,
|
||||
|
|
|
@ -16,9 +16,10 @@
|
|||
|
||||
#include <string>
|
||||
#include "coder/opcoders/op_coder.h"
|
||||
#include "micro/coder/opcoders/file_collector.h"
|
||||
#include "micro/coder/opcoders/base/dtype_cast_coder.h"
|
||||
#include "micro/coder/opcoders/serializers/serializer.h"
|
||||
#include "coder/opcoders/file_collector.h"
|
||||
#include "coder/opcoders/base/dtype_cast_coder.h"
|
||||
#include "coder/opcoders/serializers/serializer.h"
|
||||
#include "coder/opcoders/parallel.h"
|
||||
|
||||
using mindspore::schema::PrimitiveType_Cast;
|
||||
namespace mindspore::lite::micro {
|
||||
|
@ -35,8 +36,7 @@ int DTypeCastCoder::Prepare(CoderContext *const context) {
|
|||
}
|
||||
|
||||
int DTypeCastCoder::DoCode(CoderContext *const context) {
|
||||
int task_id = 0;
|
||||
int data_num = MSMIN(stride_, data_num_ - task_id * stride_);
|
||||
int data_num = MSMIN(stride_, data_num_ - kDefaultTaskId * stride_);
|
||||
if (data_num <= 0) {
|
||||
return RET_OK;
|
||||
}
|
||||
|
|
|
@ -19,6 +19,7 @@
|
|||
#include <string>
|
||||
#include "coder/opcoders/file_collector.h"
|
||||
#include "coder/opcoders/serializers/nnacl_serializer/nnacl_fp32_serializer.h"
|
||||
#include "coder/opcoders/parallel.h"
|
||||
using mindspore::schema::PrimitiveType_StridedSlice;
|
||||
|
||||
namespace mindspore::lite::micro {
|
||||
|
@ -95,9 +96,8 @@ int StridedSliceBaseCoder::Prepare(CoderContext *context) {
|
|||
int StridedSliceBaseCoder::DoFastCode(CoderContext *ctx) {
|
||||
std::vector<int> in_shape = input_tensor_->shape();
|
||||
std::vector<int> out_shape = output_tensor_->shape();
|
||||
int task_id = 0;
|
||||
int begin_index = strided_slice_parameter_->begins_[split_axis_];
|
||||
int caled_num = task_id * cal_num_per_thread_;
|
||||
int caled_num = kDefaultTaskId * cal_num_per_thread_;
|
||||
nnacl::NNaclFp32Serializer code;
|
||||
std::string input_ptr_str = allocator_->GetRuntimeAddr(input_tensor_);
|
||||
std::string output_ptr_str = allocator_->GetRuntimeAddr(output_tensor_);
|
||||
|
|
|
@ -19,6 +19,7 @@
|
|||
#include "nnacl/op_base.h"
|
||||
#include "coder/opcoders/serializers/nnacl_serializer/nnacl_fp32_serializer.h"
|
||||
#include "coder/opcoders/file_collector.h"
|
||||
#include "coder/opcoders/parallel.h"
|
||||
|
||||
using mindspore::schema::PrimitiveType_Activation;
|
||||
|
||||
|
@ -27,11 +28,10 @@ namespace mindspore::lite::micro::nnacl {
|
|||
int ActivationFP32Coder::DoCode(CoderContext *const context) {
|
||||
// attribute
|
||||
auto *activation_parameter = reinterpret_cast<ActivationParameter *>(parameter_);
|
||||
int task_id = 0;
|
||||
int length = input_tensor_->ElementsNum();
|
||||
MS_CHECK_TRUE(thread_num_ > 0, "thread_num_ <= 0");
|
||||
int stride = UP_DIV(length, thread_num_);
|
||||
int count = MSMIN(stride, length - stride * task_id);
|
||||
int count = MSMIN(stride, length - stride * kDefaultTaskId);
|
||||
|
||||
Collect(context,
|
||||
{
|
||||
|
|
|
@ -20,6 +20,7 @@
|
|||
#include "nnacl/fp32/arithmetic_fp32.h"
|
||||
#include "coder/opcoders/serializers/nnacl_serializer/nnacl_fp32_serializer.h"
|
||||
#include "coder/opcoders/file_collector.h"
|
||||
#include "coder/opcoders/parallel.h"
|
||||
|
||||
namespace mindspore::lite::micro::nnacl {
|
||||
|
||||
|
@ -62,8 +63,7 @@ int ArithmeticSelfFP32Coder::Prepare(CoderContext *const context) {
|
|||
}
|
||||
|
||||
int ArithmeticSelfFP32Coder::DoCode(CoderContext *const context) {
|
||||
int task_id = 0;
|
||||
int size = MSMIN(thread_sz_stride_, static_cast<int>(data_size_ - task_id * thread_sz_stride_));
|
||||
int size = MSMIN(thread_sz_stride_, static_cast<int>(data_size_ - kDefaultTaskId * thread_sz_stride_));
|
||||
|
||||
MS_CHECK_TRUE(!arithmetic_self_run_.empty(), "arithmetic_run function is nullptr!");
|
||||
|
||||
|
|
|
@ -19,6 +19,7 @@
|
|||
#include "nnacl/fp32/batchnorm_fp32.h"
|
||||
#include "nnacl/op_base.h"
|
||||
#include "coder/opcoders/file_collector.h"
|
||||
#include "coder/opcoders/parallel.h"
|
||||
#include "coder/opcoders/serializers/nnacl_serializer/nnacl_fp32_serializer.h"
|
||||
|
||||
using mindspore::schema::PrimitiveType_BatchNorm;
|
||||
|
@ -45,7 +46,6 @@ int BatchnormFP32Coder::Init() {
|
|||
|
||||
int BatchnormFP32Coder::DoCode(CoderContext *const context) {
|
||||
// attribute
|
||||
int task_id = 0;
|
||||
auto bn_parameter = reinterpret_cast<BatchNormParameter *>(parameter_);
|
||||
if (Init() != RET_OK) {
|
||||
MS_LOG(ERROR) << "BatchnormFP32Coder Init error";
|
||||
|
@ -63,7 +63,8 @@ int BatchnormFP32Coder::DoCode(CoderContext *const context) {
|
|||
});
|
||||
NNaclFp32Serializer code;
|
||||
code.CodeStruct("bn_parameter", *bn_parameter);
|
||||
code.CodeFunction("BatchNormFp32", input_tensor_, mean_tensor, var_tensor, "&bn_parameter", task_id, output_tensor_);
|
||||
code.CodeFunction("BatchNormFp32", input_tensor_, mean_tensor, var_tensor, "&bn_parameter", kDefaultTaskId,
|
||||
output_tensor_);
|
||||
MS_LOG(INFO) << "BatchnormFP32Code has been called";
|
||||
context->AppendCode(code.str());
|
||||
return lite::RET_OK;
|
||||
|
|
|
@ -17,6 +17,7 @@
|
|||
#include "coder/opcoders/nnacl/fp32/convolution_depthwise_fp32_coder.h"
|
||||
#include <string>
|
||||
#include "coder/log.h"
|
||||
#include "coder/opcoders/parallel.h"
|
||||
#include "coder/opcoders/file_collector.h"
|
||||
#include "coder/opcoders/serializers/nnacl_serializer/nnacl_fp32_serializer.h"
|
||||
|
||||
|
@ -39,11 +40,10 @@ int ConvolutionDepthwiseFP32Coder::InitWeightBias() {
|
|||
MS_CHECK_PTR(packed_weight_);
|
||||
MS_CHECK_RET_CODE(memset_s(packed_weight_, packed_weight_data_size, 0, packed_weight_data_size),
|
||||
"memset packed weight failed!");
|
||||
PackNCHWToNHWCFp32(origin_weight, packed_weight_, 1, filter_tensor_->Height() * filter_tensor_->Width(), channel, 0,
|
||||
0);
|
||||
PackNCHWToNHWCFp32(origin_weight, packed_weight_, 1, filter_tensor_->Height() * filter_tensor_->Width(), channel,
|
||||
kDefaultTaskId, 0);
|
||||
|
||||
auto channel_size = static_cast<size_t>(channel);
|
||||
auto bias_size = static_cast<size_t>(channel_size * sizeof(float));
|
||||
auto bias_size = static_cast<size_t>(channel * sizeof(float));
|
||||
bias_ = reinterpret_cast<float *>(allocator_->Malloc(kNumberTypeFloat32, bias_size, kOfflinePackWeight));
|
||||
MS_CHECK_PTR(bias_);
|
||||
MS_CHECK_RET_CODE(memset_s(bias_, bias_size, 0, bias_size), "memset bias failed!");
|
||||
|
@ -51,9 +51,7 @@ int ConvolutionDepthwiseFP32Coder::InitWeightBias() {
|
|||
if (input_tensors_.size() == kInputSize2) {
|
||||
auto *ori_bias = reinterpret_cast<float *>(bias_tensor_->data_c());
|
||||
MS_CHECK_TRUE(bias_tensor_->ElementsNum() > 0, "invalid bias length");
|
||||
MS_CHECK_RET_CODE(memcpy_s(bias_, static_cast<size_t>(bias_tensor_->ElementsNum() * sizeof(float)), ori_bias,
|
||||
static_cast<size_t>(bias_tensor_->ElementsNum() * sizeof(float))),
|
||||
"memcpy_s bias failed!");
|
||||
MS_CHECK_RET_CODE(memcpy_s(bias_, bias_size, ori_bias, bias_tensor_->Size()), "memcpy_s bias failed!");
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
@ -76,8 +74,7 @@ int ConvolutionDepthwiseFP32Coder::DoCode(CoderContext *const context) {
|
|||
nnacl::NNaclFp32Serializer code;
|
||||
// call the op function
|
||||
code.CodeStruct("conv_parameter", *conv_param_);
|
||||
int task_id = 0;
|
||||
code.CodeFunction("ConvDw", output_tensor_, input_tensor_, packed_weight_, bias_, "&conv_parameter", task_id);
|
||||
code.CodeFunction("ConvDw", output_tensor_, input_tensor_, packed_weight_, bias_, "&conv_parameter", kDefaultTaskId);
|
||||
context->AppendCode(code.str());
|
||||
return RET_OK;
|
||||
}
|
||||
|
|
|
@ -161,8 +161,7 @@ int ConvolutionWinogradFP32Coder::InitWeightBias() {
|
|||
MS_CHECK_RET_CODE(ret, "memset_s failed!");
|
||||
if (input_tensors_.size() == kInputSize2) {
|
||||
auto ori_bias_addr = reinterpret_cast<float *>(bias_tensor_->data_c());
|
||||
MS_CHECK_RET_CODE(memcpy_s(new_bias_, static_cast<size_t>(out_channel_size * sizeof(float)), ori_bias_addr,
|
||||
static_cast<size_t>(out_channel_size * sizeof(float))),
|
||||
MS_CHECK_RET_CODE(memcpy_s(new_bias_, new_bias_ele_size, ori_bias_addr, out_channel_size * sizeof(float)),
|
||||
"memset_s failed!");
|
||||
} else {
|
||||
return RET_ERROR;
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
#include <math.h>
|
||||
#include "coder/opcoders/file_collector.h"
|
||||
#include "coder/opcoders/serializers/nnacl_serializer/nnacl_fp32_serializer.h"
|
||||
#include "coder/opcoders/parallel.h"
|
||||
using mindspore::schema::PrimitiveType_ExpFusion;
|
||||
|
||||
namespace mindspore::lite::micro::nnacl {
|
||||
|
@ -48,9 +49,8 @@ int ExpFP32Coder::DoCode(CoderContext *ctx) {
|
|||
"exp_fp32.c",
|
||||
});
|
||||
nnacl::NNaclFp32Serializer code;
|
||||
int task_id = 0;
|
||||
code.CodeStruct("exp_parameter", *exp_parameter_);
|
||||
code.CodeFunction("Exp", input_tensor_, "(ExpParameter *)&exp_parameter", task_id);
|
||||
code.CodeFunction("Exp", input_tensor_, "(ExpParameter *)&exp_parameter", kDefaultTaskId);
|
||||
ctx->AppendCode(code.str());
|
||||
return RET_OK;
|
||||
}
|
||||
|
|
|
@ -19,6 +19,7 @@
|
|||
#include "nnacl/gather_parameter.h"
|
||||
#include "coder/opcoders/serializers/nnacl_serializer/nnacl_fp32_serializer.h"
|
||||
#include "coder/log.h"
|
||||
#include "coder/opcoders/parallel.h"
|
||||
#include "coder/opcoders/file_collector.h"
|
||||
|
||||
using mindspore::schema::PrimitiveType_Gather;
|
||||
|
@ -55,10 +56,9 @@ int GatherFP32Coder::DoCode(CoderContext *context) {
|
|||
for (int i = axis + 1; i < in_rank; ++i) {
|
||||
inner_size *= in_shape.at(i);
|
||||
}
|
||||
int task_id = 0;
|
||||
MS_CHECK_TRUE(thread_num_ > 0, "thread_num_ <= 0");
|
||||
int stride = UP_DIV(outer_size, thread_num_);
|
||||
int count = MSMIN(stride, outer_size - stride * task_id);
|
||||
int count = MSMIN(stride, outer_size - stride * kDefaultTaskId);
|
||||
|
||||
// call the op function
|
||||
if (input0->data_type() == kNumberTypeInt32) {
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
#include <string>
|
||||
#include <vector>
|
||||
#include "coder/log.h"
|
||||
#include "coder/opcoders/parallel.h"
|
||||
#include "coder/opcoders/serializers/nnacl_serializer/nnacl_fp32_serializer.h"
|
||||
#include "coder/opcoders/file_collector.h"
|
||||
#include "nnacl/fp32/matmul_fp32.h"
|
||||
|
@ -202,9 +203,8 @@ int MatMulFP32BaseCoder::DoCode(CoderContext *const context) {
|
|||
code.CodeFunction("InitMatrixB", filter_tensor_, b_pack_ptr_, "&mat_mul_parameter", vec_matmul_);
|
||||
}
|
||||
|
||||
int task_id = 0;
|
||||
int current_stride_oc = thread_stride_ * col_tile_;
|
||||
int current_rest_oc = params_->col_ - task_id * thread_stride_ * col_tile_;
|
||||
int current_rest_oc = params_->col_ - kDefaultTaskId * thread_stride_ * col_tile_;
|
||||
int cur_oc = MSMIN(current_stride_oc, current_rest_oc);
|
||||
if (cur_oc <= 0) {
|
||||
return RET_OK;
|
||||
|
|
|
@ -20,6 +20,7 @@
|
|||
#include "coder/log.h"
|
||||
#include "coder/opcoders/serializers/nnacl_serializer/nnacl_fp32_serializer.h"
|
||||
#include "coder/opcoders/file_collector.h"
|
||||
#include "coder/opcoders/parallel.h"
|
||||
|
||||
using mindspore::schema::PrimitiveType_PadFusion;
|
||||
|
||||
|
@ -79,7 +80,6 @@ int PadFP32Coder::ExtendPaddings(int *paddings, int length, const int *ori_paddi
|
|||
}
|
||||
|
||||
int PadFP32Coder::DoCode(CoderContext *const context) {
|
||||
int task_id = thread_num_ - 1;
|
||||
Collect(context,
|
||||
{
|
||||
"nnacl/fp32/pad.h",
|
||||
|
@ -101,7 +101,7 @@ int PadFP32Coder::DoCode(CoderContext *const context) {
|
|||
std::vector<float> constant_values(output_size, pad_param_->constant_value_);
|
||||
code.CodeArray("output_tensor_", constant_values.data(), output_size);
|
||||
}
|
||||
code.CodeFunction("Pad", input_tensor_, output_tensor_, "in_", "out_", "padding_", task_id, thread_num_);
|
||||
code.CodeFunction("Pad", input_tensor_, output_tensor_, "in_", "out_", "padding_", kDefaultTaskId, thread_num_);
|
||||
context->AppendCode(code.str());
|
||||
return RET_OK;
|
||||
}
|
||||
|
|
|
@ -19,6 +19,7 @@
|
|||
#include "nnacl/fp32/pooling_fp32.h"
|
||||
#include "coder/opcoders/serializers/nnacl_serializer/nnacl_fp32_serializer.h"
|
||||
#include "coder/log.h"
|
||||
#include "coder/opcoders/parallel.h"
|
||||
#include "coder/opcoders/file_collector.h"
|
||||
|
||||
using mindspore::schema::PrimitiveType_AvgPoolFusion;
|
||||
|
@ -29,7 +30,6 @@ namespace mindspore::lite::micro::nnacl {
|
|||
int PoolingFP32Coder::DoCode(CoderContext *const context) {
|
||||
// attribute
|
||||
auto pooling_parameter = reinterpret_cast<PoolingParameter *>(parameter_);
|
||||
int task_id = 0;
|
||||
// init struct PoolingParameters
|
||||
pooling_parameter->input_batch_ = input_tensor_->Batch();
|
||||
pooling_parameter->input_channel_ = input_tensor_->Channel();
|
||||
|
@ -70,7 +70,7 @@ int PoolingFP32Coder::DoCode(CoderContext *const context) {
|
|||
}
|
||||
}
|
||||
|
||||
code.CodeFunction("MaxPooling", input_tensor_, output_tensor_, "&pooling_parameter", task_id, minf, maxf);
|
||||
code.CodeFunction("MaxPooling", input_tensor_, output_tensor_, "&pooling_parameter", kDefaultTaskId, minf, maxf);
|
||||
} else {
|
||||
Collect(context,
|
||||
{
|
||||
|
@ -94,7 +94,7 @@ int PoolingFP32Coder::DoCode(CoderContext *const context) {
|
|||
break;
|
||||
}
|
||||
}
|
||||
code.CodeFunction("AvgPooling", input_tensor_, output_tensor_, "&pooling_parameter", task_id, minf, maxf);
|
||||
code.CodeFunction("AvgPooling", input_tensor_, output_tensor_, "&pooling_parameter", kDefaultTaskId, minf, maxf);
|
||||
}
|
||||
|
||||
MS_LOG(INFO) << "PoolingFp32Code has been called";
|
||||
|
|
|
@ -19,6 +19,7 @@
|
|||
#include <vector>
|
||||
#include "coder/opcoders/serializers/nnacl_serializer/nnacl_fp32_serializer.h"
|
||||
#include "coder/opcoders/file_collector.h"
|
||||
#include "coder/opcoders/parallel.h"
|
||||
|
||||
using mindspore::schema::PrimitiveType_PowFusion;
|
||||
|
||||
|
@ -31,10 +32,9 @@ int PowerFP32Coder::DoCode(CoderContext *const context) {
|
|||
Tensor *filter_tensor = input_tensors_.at(kWeightIndex);
|
||||
MS_CHECK_PTR(filter_tensor);
|
||||
int size = input_tensor_->ElementsNum();
|
||||
int task_id = 0;
|
||||
MS_CHECK_TRUE(thread_num_ > 0, "thread_num_ <= 0");
|
||||
int stride = UP_DIV(size, thread_num_);
|
||||
int len = MSMIN(stride, size - stride * task_id);
|
||||
int len = MSMIN(stride, size - stride * kDefaultTaskId);
|
||||
std::string exp_addr;
|
||||
bool broadcast = true;
|
||||
if (input_tensors_.size() == 2) {
|
||||
|
|
|
@ -19,6 +19,7 @@
|
|||
#include <string>
|
||||
#include "coder/opcoders/serializers/nnacl_serializer/nnacl_fp32_serializer.h"
|
||||
#include "coder/opcoders/file_collector.h"
|
||||
#include "coder/opcoders/parallel.h"
|
||||
|
||||
using mindspore::schema::PrimitiveType_Transpose;
|
||||
namespace mindspore::lite::micro::nnacl {
|
||||
|
@ -72,8 +73,7 @@ int TransposeFp32Coder::Prepare(CoderContext *const context) {
|
|||
}
|
||||
|
||||
int TransposeFp32Coder::DoCode(CoderContext *const context) {
|
||||
int task_id = 0;
|
||||
int num_unit_thread = MSMIN(thread_h_stride_, num_unit_ - task_id * thread_h_stride_);
|
||||
int num_unit_thread = MSMIN(thread_h_stride_, num_unit_ - kDefaultTaskId * thread_h_stride_);
|
||||
if (num_unit_thread <= 0) {
|
||||
return RET_OK;
|
||||
}
|
||||
|
@ -92,7 +92,8 @@ int TransposeFp32Coder::DoCode(CoderContext *const context) {
|
|||
code.CodeStruct("transpose_parameter", *transpose_parameter_);
|
||||
|
||||
code.CodeFunction("DoTransposeFp32", input_tensor_, output_tensor_, in_shape_, out_shape_,
|
||||
"(TransposeParameter *)&transpose_parameter", task_id, num_unit_thread, dim_size_, position_);
|
||||
"(TransposeParameter *)&transpose_parameter", kDefaultTaskId, num_unit_thread, dim_size_,
|
||||
position_);
|
||||
|
||||
context->AppendCode(code.str());
|
||||
return RET_OK;
|
||||
|
|
|
@ -68,9 +68,8 @@ int Conv2D3x3Int8Coder::InitWeightBias() {
|
|||
MS_CHECK_RET_CODE(memset_s(new_bias_addr_, new_bias_size, 0, new_bias_size), "memset_s new_bias_addr_ failed.");
|
||||
if (input_tensors_.size() == kInputSize2) {
|
||||
auto *ori_bias_addr = reinterpret_cast<int32_t *>(bias_tensor_->data_c());
|
||||
MS_CHECK_RET_CODE(
|
||||
memcpy_s(new_bias_addr_, output_channel * sizeof(int32_t), ori_bias_addr, output_channel * sizeof(int32_t)),
|
||||
"memset_s new_bias_addr_ failed.");
|
||||
MS_CHECK_RET_CODE(memcpy_s(new_bias_addr_, new_bias_size, ori_bias_addr, output_channel * sizeof(int32_t)),
|
||||
"memset_s new_bias_addr_ failed.");
|
||||
} else {
|
||||
MS_ASSERT(input_tensors_.size() == kInputSize1);
|
||||
}
|
||||
|
@ -157,9 +156,8 @@ int Conv2D3x3Int8Coder::DoCode(CoderContext *const context) {
|
|||
output_tensor_, tile_buffer_, block_unit_buffer_, tmp_dst_buffer_, tmp_out_, "&conv_param_");
|
||||
code.CodeFunction(kParallelLaunch, "THREAD_POOL_DEFAULT", "Conv3x3Int8Run", kRunArgsAddr, "thread_num");
|
||||
} else {
|
||||
int task_id = 0;
|
||||
code.CodeFunction("Conv3x3Int8", c8_input_, transformed_filter_addr_, new_bias_addr_, output_tensor_, tile_buffer_,
|
||||
block_unit_buffer_, tmp_dst_buffer_, tmp_out_, task_id, "&conv_param_");
|
||||
block_unit_buffer_, tmp_dst_buffer_, tmp_out_, kDefaultTaskId, "&conv_param_");
|
||||
}
|
||||
code.CodeFunction("PackNC4HW4ToNHWCInt8", tmp_out_, output_tensor_, conv_param_->output_batch_,
|
||||
conv_param_->output_h_ * conv_param_->output_w_, conv_param_->output_channel_);
|
||||
|
|
|
@ -71,8 +71,7 @@ int DeconvolutionInt8Coder::InitBiasWeight(CoderContext *const context) {
|
|||
MS_CHECK_RET_CODE(memset_s(bias_data_, size, 0, size), "memset_s new_bias_addr_ failed.");
|
||||
if (input_tensors_.size() == kInputSize2) {
|
||||
auto *ori_bias_addr = reinterpret_cast<int32_t *>(bias_tensor_->data_c());
|
||||
MS_CHECK_RET_CODE(memcpy_s(bias_data_, conv_param_->output_channel_ * sizeof(int32_t), ori_bias_addr,
|
||||
conv_param_->output_channel_ * sizeof(int32_t)),
|
||||
MS_CHECK_RET_CODE(memcpy_s(bias_data_, size, ori_bias_addr, bias_tensor_->Size()),
|
||||
"memcpy_s new_bias_addr_ failed.");
|
||||
}
|
||||
|
||||
|
|
|
@ -19,6 +19,7 @@
|
|||
#include <string>
|
||||
#include "coder/opcoders/serializers/nnacl_serializer/nnacl_int8_serializer.h"
|
||||
#include "coder/opcoders/file_collector.h"
|
||||
#include "coder/opcoders/parallel.h"
|
||||
namespace mindspore::lite::micro::nnacl {
|
||||
|
||||
int MatMulBaseInt8Coder::ReSize(CoderContext *const context) {
|
||||
|
@ -209,7 +210,6 @@ int MatMulBaseInt8Coder::DoCode(CoderContext *const context) {
|
|||
param_->col_, param_->col_align_, param_->deep_16_, quant_.input_.zp_, "init_filter_zp",
|
||||
bias_ptr_, param_->b_transpose_, filter_per_channel_);
|
||||
}
|
||||
int task_id = 0;
|
||||
std::string a_ptr_str = allocator_->GetRuntimeAddr(input_tensor_);
|
||||
std::string c_ptr_str = allocator_->GetRuntimeAddr(output_tensor_);
|
||||
std::string pack_b_ptr_str = allocator_->GetRuntimeAddr(pack_b_ptr_);
|
||||
|
@ -233,7 +233,7 @@ int MatMulBaseInt8Coder::DoCode(CoderContext *const context) {
|
|||
std::string batch_c_ptr_str = c_ptr_str + "+" + std::to_string(i * param_->row_ * param_->col_);
|
||||
|
||||
int stride = thread_stride_ * col_tile_;
|
||||
int cur_stride = task_id * stride;
|
||||
int cur_stride = kDefaultTaskId * stride;
|
||||
int res_stride = param_->col_ - cur_stride;
|
||||
int cur_oc = MSMIN(stride, res_stride);
|
||||
if (cur_oc <= 0) {
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
#include <string>
|
||||
#include "coder/opcoders/file_collector.h"
|
||||
#include "coder/log.h"
|
||||
#include "coder/opcoders/parallel.h"
|
||||
#include "coder/opcoders/serializers/nnacl_serializer/nnacl_int8_serializer.h"
|
||||
|
||||
using mindspore::schema::PrimitiveType_ReduceFusion;
|
||||
|
@ -190,7 +191,6 @@ int ReduceInt8Coder::Prepare(CoderContext *const context) {
|
|||
|
||||
int ReduceInt8Coder::DoCode(CoderContext *const context) {
|
||||
MS_LOG(DEBUG) << "*****Reduce code start*****";
|
||||
int task_id = 0;
|
||||
NNaclInt8Serializer code;
|
||||
Collect(context,
|
||||
{
|
||||
|
@ -226,10 +226,10 @@ int ReduceInt8Coder::DoCode(CoderContext *const context) {
|
|||
axis_size_ = axis_sizes_.at(i);
|
||||
if (!is_last_axis) {
|
||||
code.CodeFunction(reducer_, outer_size_, inner_size_, axis_size_, begin_src_data_src, dst_addr, ptr_quan_arg_i,
|
||||
task_id, thread_num_);
|
||||
kDefaultTaskId, thread_num_);
|
||||
} else {
|
||||
code.CodeFunction(last_reducer_, outer_size_, inner_size_, axis_size_, begin_src_data_src, dst_addr,
|
||||
ptr_quan_arg_i, task_id, thread_num_);
|
||||
ptr_quan_arg_i, kDefaultTaskId, thread_num_);
|
||||
}
|
||||
begin_src_data_src = dst_addr;
|
||||
}
|
||||
|
|
|
@ -25,7 +25,7 @@ bool GetSupportOptFlag() {
|
|||
#ifdef ENABLE_ARM64
|
||||
int hwcap_type = 16;
|
||||
// getHwCap
|
||||
uint32_t hwcap = getauxval(hwcap_type);
|
||||
const uint32_t hwcap = getauxval(hwcap_type);
|
||||
if (hwcap & HWCAP_ASIMDDP) {
|
||||
status = true;
|
||||
} else {
|
||||
|
|
|
@ -34,7 +34,7 @@ static void heapify(const float *scores, int *indexes, int n, int i) {
|
|||
while (i < n) {
|
||||
int cur = i;
|
||||
int l = 2 * i + 1;
|
||||
int r = 2 * i + 2;
|
||||
const int r = 2 * i + 2;
|
||||
if (r < n && compare(indexes[cur], indexes[r], scores)) {
|
||||
cur = r;
|
||||
}
|
||||
|
|
|
@ -19,6 +19,9 @@
|
|||
#include <string.h>
|
||||
void DequantDataPerChannel(const int8_t *quant_src, const DeQuantArg **de_quant_args, size_t de_quant_nums,
|
||||
size_t per_batch_size, float *de_quant_dst) {
|
||||
if (per_batch_size == 0) {
|
||||
return;
|
||||
}
|
||||
size_t matrix_size = de_quant_nums / per_batch_size;
|
||||
for (int i = 0; i < per_batch_size; i++) {
|
||||
const DeQuantArg *de_quant_arg = de_quant_args[i];
|
||||
|
@ -32,6 +35,9 @@ void DequantDataPerChannel(const int8_t *quant_src, const DeQuantArg **de_quant_
|
|||
|
||||
void DequantData(const int8_t *quant_src, const DeQuantArg **de_quant_args, size_t de_quant_nums, size_t channels,
|
||||
float *de_quant_dst) {
|
||||
if (channels == 0) {
|
||||
return;
|
||||
}
|
||||
size_t per_channel_size = de_quant_nums / channels;
|
||||
for (size_t i = 0; i < channels; i++) {
|
||||
const DeQuantArg *de_quant_arg = de_quant_args[i];
|
||||
|
@ -57,7 +63,7 @@ void DequantDataPerTensor(const int8_t *quant_src, const DeQuantArg **de_quant_a
|
|||
int32_t zero_point = de_quant_arg->zeroPoint;
|
||||
for (int j = 0; j < de_quant_nums; j++) {
|
||||
int8_t quant_data = quant_src[j];
|
||||
if (quant_clusters) {
|
||||
if (quant_clusters != NULL) {
|
||||
if (quant_data > INT8_MAX || quant_data < INT8_MIN) {
|
||||
return;
|
||||
}
|
||||
|
|
|
@ -39,7 +39,7 @@ int LiteSession::CompileGraph(lite::Model *model) {
|
|||
out_shape_0[1] = 10;
|
||||
outputs_[0] = new (std::nothrow) MTensor(String("Softmax-7"), kNumberTypeFloat32, out_shape_0);
|
||||
MS_ERROR_IF_NULL(outputs_[0]);
|
||||
int ret = Init(model->buf, dynamic_cast<MModel *>(model)->buf_size());
|
||||
int ret = Init(model->buf, static_cast<MModel *>(model)->buf_size());
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
|
|
@ -78,7 +78,6 @@ class LiteSession : public session::LiteSession {
|
|||
Vector<MTensor *> outputs_;
|
||||
void *runtime_buffer_;
|
||||
};
|
||||
|
||||
} // namespace lite
|
||||
} // namespace mindspore
|
||||
|
||||
|
|
Loading…
Reference in New Issue