forked from mindspore-Ecosystem/mindspore
add matmul && fc (int8 && fp32)
This commit is contained in:
parent
2c22d2bb3a
commit
aa168a93b3
|
@ -57,7 +57,10 @@ set(CODER_OPCODERS_SRC
|
|||
${MICRO_DIR}/coder/opcoders/nnacl/fp32/convolution_winograd_fp32_coder.cc
|
||||
${MICRO_DIR}/coder/opcoders/nnacl/fp32/convolution_depthwise_fp32_coder.cc
|
||||
${MICRO_DIR}/coder/opcoders/nnacl/fp32/expand_dims_fp32_coder.cc
|
||||
${MICRO_DIR}/coder/opcoders/nnacl/fp32/full_connection_fp32_coder.cc
|
||||
${MICRO_DIR}/coder/opcoders/nnacl/fp32/gather_fp32_coder.cc
|
||||
${MICRO_DIR}/coder/opcoders/nnacl/fp32/matmul_fp32_base_coder.cc
|
||||
${MICRO_DIR}/coder/opcoders/nnacl/fp32/matmul_fp32_coder.cc
|
||||
${MICRO_DIR}/coder/opcoders/nnacl/fp32/nchw2nhwc_fp32_coder.cc
|
||||
${MICRO_DIR}/coder/opcoders/nnacl/fp32/nhwc2nchw_fp32_coder.cc
|
||||
${MICRO_DIR}/coder/opcoders/nnacl/fp32/pad_fp32_coder.cc
|
||||
|
@ -73,6 +76,8 @@ set(CODER_OPCODERS_SRC
|
|||
${MICRO_DIR}/coder/opcoders/nnacl/fp32/transpose_fp32_coder.cc
|
||||
#### nnacl int8 coder
|
||||
${MICRO_DIR}/coder/opcoders/nnacl/int8/concat_int8_coder.cc
|
||||
${MICRO_DIR}/coder/opcoders/nnacl/int8/fullconnection_int8_coder.cc
|
||||
${MICRO_DIR}/coder/opcoders/nnacl/int8/matmul_int8_coder.cc
|
||||
${MICRO_DIR}/coder/opcoders/nnacl/int8/pooling_int8_coder.cc
|
||||
${MICRO_DIR}/coder/opcoders/nnacl/int8/reduce_int8_coder.cc
|
||||
${MICRO_DIR}/coder/opcoders/nnacl/int8/reshape_int8_coder.cc
|
||||
|
@ -102,6 +107,10 @@ set(LITE_SRC
|
|||
${LITE_DIR}/src/ops/while.cc
|
||||
### populate operator parameter
|
||||
${LITE_DIR}/src/ops/populate/conv2d_populate.cc
|
||||
### tools
|
||||
${LITE_DIR}/tools/common/flag_parser.cc
|
||||
)
|
||||
set(LITE_KERNEL_SRC
|
||||
### nnacl
|
||||
${LITE_DIR}/nnacl/base/minimal_filtering_generator.c
|
||||
${LITE_DIR}/nnacl/fp32/winograd_utils.c
|
||||
|
@ -110,9 +119,13 @@ set(LITE_SRC
|
|||
${LITE_DIR}/nnacl/int8/pack_int8.c
|
||||
${LITE_DIR}/nnacl/int8/matmul_int8.c
|
||||
${LITE_DIR}/nnacl/int8/fixed_point.c
|
||||
### tools
|
||||
${LITE_DIR}/tools/common/flag_parser.cc
|
||||
${LITE_DIR}/nnacl/fp32/matmul_fp32.c
|
||||
)
|
||||
set(MICRO_ADAPTER_SRC
|
||||
${MICRO_DIR}/wrapper/fp32/matmul_fp32_wrapper.c
|
||||
${MICRO_DIR}/wrapper/int8/matmul_int8_wrapper.c
|
||||
)
|
||||
|
||||
list(APPEND FILE_SET ${CODER_SRC} ${CODER_UTILS_SRC} ${CODER_OPCODERS_SRC} ${CODER_GENERATOR_SRC}
|
||||
${CODER_ALLOCATOR_SRC} ${LITE_SRC})
|
||||
${CODER_ALLOCATOR_SRC} ${LITE_SRC} ${LITE_KERNEL_SRC} ${MICRO_ADAPTER_SRC})
|
||||
|
||||
|
|
|
@ -0,0 +1,77 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "coder/opcoders/nnacl/fp32/full_connection_fp32_coder.h"
|
||||
#include "coder/log.h"
|
||||
#include "coder/opcoders/file_collector.h"
|
||||
|
||||
using mindspore::schema::PrimitiveType_FullConnection;
|
||||
|
||||
namespace mindspore::lite::micro::nnacl {
|
||||
int FullConnectionFP32Coder::ReSize() {
|
||||
int row = 1;
|
||||
for (int i = 0; i < static_cast<int>(output_tensor_->shape().size() - 1); ++i) {
|
||||
row *= output_tensor_->shape().at(i);
|
||||
}
|
||||
params_->row_ = row;
|
||||
params_->col_ = output_tensor_->shape().back();
|
||||
params_->deep_ = filter_tensor_->shape().at(1);
|
||||
return MatMulFP32BaseCoder::ReSize();
|
||||
}
|
||||
|
||||
int FullConnectionFP32Coder::Init() {
|
||||
this->params_ = reinterpret_cast<MatMulParameter *>(parameter_);
|
||||
filter_tensor_ = input_tensors_.at(kWeightIndex);
|
||||
MS_CHECK_PTR(filter_tensor_);
|
||||
if (input_tensors_.size() == kInputSize2) {
|
||||
bias_tensor_ = input_tensors_.at(kBiasIndex);
|
||||
MS_CHECK_PTR(bias_tensor_);
|
||||
MS_CHECK_PTR(bias_tensor_->data_c());
|
||||
}
|
||||
params_->a_const_ = (input_tensor_->data_c() != nullptr);
|
||||
params_->b_const_ = (filter_tensor_->data_c() != nullptr);
|
||||
MatMulFP32BaseCoder::InitParameter();
|
||||
if (params_->a_const_) {
|
||||
std::vector<int> a_shape = input_tensor_->shape();
|
||||
params_->row_ = a_shape.at(0);
|
||||
params_->deep_ = a_shape.at(1);
|
||||
}
|
||||
|
||||
if (params_->b_const_) {
|
||||
std::vector<int> b_shape = filter_tensor_->shape();
|
||||
params_->col_ = b_shape.at(0);
|
||||
params_->deep_ = b_shape.at(1);
|
||||
}
|
||||
params_->batch = 1;
|
||||
params_->a_transpose_ = false;
|
||||
params_->b_transpose_ = true;
|
||||
MS_CHECK_RET_CODE(MatMulFP32BaseCoder::Init(), "MatMulFP32BaseCoder init failed");
|
||||
if (params_->row_ == 1 && !params_->b_const_) {
|
||||
vec_matmul_ = true;
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int FullConnectionFP32Coder::Prepare(CoderContext *const context) {
|
||||
MS_CHECK_RET_CODE(Init(), "FullConnectionFP32Coder Init failed");
|
||||
return ReSize();
|
||||
}
|
||||
|
||||
int FullConnectionFP32Coder::DoCode(CoderContext *const context) { return MatMulFP32BaseCoder::DoCode(context); }
|
||||
|
||||
REG_OPERATOR_CODER(kAllTargets, kNumberTypeFloat32, PrimitiveType_FullConnection,
|
||||
CPUOpCoderCreator<FullConnectionFP32Coder>)
|
||||
} // namespace mindspore::lite::micro::nnacl
|
|
@ -0,0 +1,47 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef MINDSPORE_LITE_MICRO_CODER_OPCODERS_NNACL_FP32_FULL_CONNECTION_FP32_CODER_H_
|
||||
#define MINDSPORE_LITE_MICRO_CODER_OPCODERS_NNACL_FP32_FULL_CONNECTION_FP32_CODER_H_
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <functional>
|
||||
#include "coder/opcoders/nnacl/fp32/matmul_fp32_base_coder.h"
|
||||
#include "coder/opcoders/serializers/nnacl_serializer/nnacl_fp32_serializer.h"
|
||||
|
||||
namespace mindspore::lite::micro::nnacl {
|
||||
|
||||
class FullConnectionFP32Coder final : public MatMulFP32BaseCoder {
|
||||
public:
|
||||
FullConnectionFP32Coder(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors,
|
||||
const Model::Node *node, size_t node_index, Target target)
|
||||
: MatMulFP32BaseCoder(in_tensors, out_tensors, node, node_index, target) {}
|
||||
|
||||
int Prepare(CoderContext *const context) override;
|
||||
|
||||
int DoCode(CoderContext *const context) override;
|
||||
|
||||
~FullConnectionFP32Coder() override = default;
|
||||
|
||||
private:
|
||||
int Init() override;
|
||||
int ReSize() override;
|
||||
};
|
||||
|
||||
} // namespace mindspore::lite::micro::nnacl
|
||||
|
||||
#endif // MINDSPORE_LITE_MICRO_CODER_OPCODERS_NNACL_FP32_FULL_CONNECTION_FP32_CODER_H_
|
|
@ -0,0 +1,190 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "coder/opcoders/nnacl/fp32/matmul_fp32_base_coder.h"
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include "coder/log.h"
|
||||
#include "coder/opcoders/serializers/nnacl_serializer/nnacl_fp32_serializer.h"
|
||||
#include "coder/opcoders/file_collector.h"
|
||||
#include "nnacl/fp32/matmul_fp32.h"
|
||||
#include "wrapper/fp32/matmul_fp32_wrapper.h"
|
||||
|
||||
using mindspore::schema::PrimitiveType_MatMul;
|
||||
|
||||
namespace mindspore::lite::micro::nnacl {
|
||||
|
||||
int MatMulFP32BaseCoder::ReSize() {
|
||||
ResizeParameter();
|
||||
thread_count_ = MSMIN(thread_num_, UP_DIV(params_->col_align_, col_tile_));
|
||||
thread_stride_ = UP_DIV(UP_DIV(params_->col_align_, col_tile_), thread_count_);
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int MatMulFP32BaseCoder::InitBiasData() {
|
||||
if (input_tensors_.size() == 3) {
|
||||
int max_bias_data = UP_ROUND(bias_tensor_->ElementsNum(), C16NUM);
|
||||
bias_pack_ptr_size_ = static_cast<size_t>(max_bias_data * sizeof(float));
|
||||
bias_ptr_ = reinterpret_cast<float *>(allocator_->Malloc(kNumberTypeFloat32, kOnlineSize, kOnlinePackWeight));
|
||||
MS_CHECK_PTR(bias_ptr_);
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
void MatMulFP32BaseCoder::InitParameter() {
|
||||
if (target_ == kARM32A) {
|
||||
row_tile_ = C12NUM;
|
||||
col_tile_ = C4NUM;
|
||||
} else {
|
||||
row_tile_ = C12NUM;
|
||||
col_tile_ = C8NUM;
|
||||
}
|
||||
}
|
||||
|
||||
void MatMulFP32BaseCoder::ResizeParameter() {
|
||||
if (params_->row_ == 1 && !params_->b_const_) {
|
||||
vec_matmul_ = true;
|
||||
}
|
||||
params_->row_align_ = vec_matmul_ ? 1 : UP_ROUND(params_->row_, row_tile_);
|
||||
params_->col_align_ = vec_matmul_ ? params_->col_ : UP_ROUND(params_->col_, col_tile_);
|
||||
}
|
||||
|
||||
int MatMulFP32BaseCoder::InitBufferA() {
|
||||
if (a_pack_ptr_ != nullptr) {
|
||||
return RET_OK;
|
||||
}
|
||||
if (params_->a_const_) {
|
||||
a_pack_ptr_ = reinterpret_cast<float *>(allocator_->Malloc(kNumberTypeFloat32, kOnlineSize, kOnlinePackWeight));
|
||||
} else {
|
||||
a_pack_ptr_size_ = static_cast<size_t>(params_->batch * params_->row_align_ * params_->deep_ * sizeof(float));
|
||||
a_pack_ptr_ =
|
||||
reinterpret_cast<float *>(allocator_->Malloc(kNumberTypeFloat32, a_pack_ptr_size_, kOfflinePackWeight));
|
||||
}
|
||||
MS_CHECK_PTR(a_pack_ptr_);
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int MatMulFP32BaseCoder::InitBufferB() {
|
||||
if (b_pack_ptr_ != nullptr) {
|
||||
return RET_OK;
|
||||
}
|
||||
if (params_->b_const_) {
|
||||
b_pack_ptr_ = reinterpret_cast<float *>(allocator_->Malloc(kNumberTypeFloat32, kOnlineSize, kOnlinePackWeight));
|
||||
} else {
|
||||
b_pack_ptr_size_ = static_cast<size_t>(params_->batch * params_->col_align_ * params_->deep_ * sizeof(float));
|
||||
b_pack_ptr_ =
|
||||
reinterpret_cast<float *>(allocator_->Malloc(kNumberTypeFloat32, b_pack_ptr_size_, kOfflinePackWeight));
|
||||
}
|
||||
MS_CHECK_PTR(b_pack_ptr_);
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int MatMulFP32BaseCoder::InitMatrixA(const float *src_ptr) {
|
||||
::InitMatrixA(src_ptr, a_pack_ptr_, params_, vec_matmul_);
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int MatMulFP32BaseCoder::InitMatrixB(const float *src_ptr) {
|
||||
::InitMatrixB(src_ptr, b_pack_ptr_, params_, vec_matmul_);
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int MatMulFP32BaseCoder::Init() {
|
||||
thread_count_ = thread_num_;
|
||||
ResizeParameter();
|
||||
MS_CHECK_RET_CODE(InitBiasData(), "InitBiasData failed");
|
||||
if (params_->a_const_) {
|
||||
MS_CHECK_RET_CODE(InitBufferA(), "InitBufferA failed");
|
||||
MS_CHECK_RET_CODE(InitMatrixA(reinterpret_cast<float *>(input_tensor_->data_c())), "InitMatrixA failed");
|
||||
}
|
||||
|
||||
if (params_->b_const_) {
|
||||
MS_CHECK_RET_CODE(InitBufferB(), "InitBufferB failed");
|
||||
MS_CHECK_RET_CODE(InitMatrixB(reinterpret_cast<float *>(filter_tensor_->data_c())), "InitMatrixB failed");
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int MatMulFP32BaseCoder::Prepare(CoderContext *const context) { return RET_OK; }
|
||||
|
||||
int MatMulFP32BaseCoder::DoCode(CoderContext *const context) {
|
||||
// generate code .h .c
|
||||
std::vector<std::string> asm_files;
|
||||
if (target_ == kARM32A) {
|
||||
asm_files = {"MatmulFp32.S", "MatmulFp32Opt.S"};
|
||||
} else if (target_ == kARM64) {
|
||||
asm_files = {"arm64/MatmulFp32.S", "MatmulFp32Opt.S", "arm64/MatVecMulFp32.S"};
|
||||
}
|
||||
Collect(context, {"nnacl/fp32/matmul.h", "adapter/fp32/matmul_fp32_adapter.h"}, {"matmul.c", "matmul_fp32_adapter.c"},
|
||||
asm_files);
|
||||
NNaclFp32Serializer code;
|
||||
NNaclFp32Serializer init_code;
|
||||
code.CodeStruct("mat_mul_parameter", *params_);
|
||||
init_code.CodeStruct("mat_mul_parameter", *params_);
|
||||
// do bias packing to init
|
||||
if (bias_ptr_) {
|
||||
init_code.CodeMallocExpression(bias_ptr_, bias_pack_ptr_size_);
|
||||
init_code.CodeFunction("memcpy", bias_ptr_, bias_tensor_->data_c(), bias_pack_ptr_size_);
|
||||
}
|
||||
|
||||
std::string c_str = allocator_->GetRuntimeAddr(output_tensor_);
|
||||
std::string a_pack_str = allocator_->GetRuntimeAddr(a_pack_ptr_);
|
||||
std::string b_pack_str = allocator_->GetRuntimeAddr(b_pack_ptr_);
|
||||
|
||||
// do const value packing to init
|
||||
if (!params_->a_const_) {
|
||||
code.CodeFunction("InitMatrixA", input_tensor_, a_pack_ptr_, "&mat_mul_parameter", vec_matmul_);
|
||||
// b_pack_str has been memset, no need to memset
|
||||
init_code.CodeFunction("InitMatrixB", filter_tensor_, b_pack_ptr_, "&mat_mul_parameter", vec_matmul_);
|
||||
}
|
||||
if (!params_->b_const_) {
|
||||
// a_pack_str has been memset, no need to memset
|
||||
init_code.CodeFunction("InitMatrixA", input_tensor_, a_pack_ptr_, "&mat_mul_parameter", vec_matmul_);
|
||||
code.CodeFunction("InitMatrixB", filter_tensor_, b_pack_ptr_, "&mat_mul_parameter", vec_matmul_);
|
||||
}
|
||||
|
||||
int task_id = 0;
|
||||
int current_stride_oc = thread_stride_ * col_tile_;
|
||||
int current_rest_oc = params_->col_ - task_id * thread_stride_ * col_tile_;
|
||||
int cur_oc = MSMIN(current_stride_oc, current_rest_oc);
|
||||
if (cur_oc <= 0) {
|
||||
return RET_OK;
|
||||
}
|
||||
code << "for (int i = 0; i < " << params_->batch << "; ++i) {\n";
|
||||
if (vec_matmul_) {
|
||||
code << "\t\tbatch_a_ptr = " << a_pack_str << " + i * " << params_->deep_ << ";\n";
|
||||
code << "\t\tbatch_b_ptr = " << b_pack_str << " + i * " << params_->deep_ * params_->col_ << ";\n";
|
||||
code << "\t\tbatch_c_ptr = " << c_str << " + i * " << params_->row_ * params_->col_ << ";\n";
|
||||
} else {
|
||||
code << "\t\tbatch_a_ptr = " << a_pack_str << " + i * " << params_->row_align_ * params_->deep_ << ";\n";
|
||||
code << "\t\tbatch_b_ptr = " << b_pack_str << " + i * " << params_->deep_ * params_->col_align_ << ";\n";
|
||||
code << "\tbatch_c_ptr = " << c_str << " + i * " << params_->row_ * params_->col_ << ";\n";
|
||||
}
|
||||
|
||||
if (vec_matmul_) {
|
||||
code.CodeFunction("MatVecMulFp32", "batch_a_ptr", "batch_b_ptr", "batch_c_ptr", bias_ptr_, params_->act_type_,
|
||||
params_->deep_, cur_oc);
|
||||
} else {
|
||||
code.CodeFunction("MatMulOpt", "batch_a_ptr", "batch_b_ptr", "batch_c_ptr", bias_ptr_, params_->act_type_,
|
||||
params_->deep_, params_->row_, cur_oc, params_->col_, "OutType_Nhwc");
|
||||
}
|
||||
code << "\t\t}\n";
|
||||
|
||||
context->AppendCode(code.str());
|
||||
context->AppendInitCode(init_code.str());
|
||||
return RET_OK;
|
||||
}
|
||||
} // namespace mindspore::lite::micro::nnacl
|
|
@ -0,0 +1,70 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_LITE_MICRO_CODER_OPCODERS_NNACL_FP32_MATMUL_FP32_BASE_CODER_H_
|
||||
#define MINDSPORE_LITE_MICRO_CODER_OPCODERS_NNACL_FP32_MATMUL_FP32_BASE_CODER_H_
|
||||
|
||||
#include <vector>
|
||||
#include "coder/opcoders/op_coder.h"
|
||||
#include "nnacl/matmul_parameter.h"
|
||||
|
||||
namespace mindspore::lite::micro::nnacl {
|
||||
class MatMulFP32BaseCoder : public OperatorCoder {
|
||||
public:
|
||||
MatMulFP32BaseCoder(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors,
|
||||
const Model::Node *node, size_t node_index, Target target)
|
||||
: OperatorCoder(in_tensors, out_tensors, node, node_index, target) {}
|
||||
|
||||
~MatMulFP32BaseCoder() override = default;
|
||||
|
||||
int Prepare(CoderContext *const context) override;
|
||||
|
||||
int DoCode(CoderContext *const context) override;
|
||||
|
||||
virtual int ReSize();
|
||||
|
||||
private:
|
||||
void ResizeParameter();
|
||||
int InitBiasData();
|
||||
int InitBufferA();
|
||||
int InitBufferB();
|
||||
int InitMatrixA(const float *src_ptr);
|
||||
int InitMatrixB(const float *src_ptr);
|
||||
|
||||
protected:
|
||||
virtual int Init();
|
||||
void InitParameter();
|
||||
|
||||
protected:
|
||||
Tensor *filter_tensor_{nullptr};
|
||||
Tensor *bias_tensor_{nullptr};
|
||||
MatMulParameter *params_{nullptr};
|
||||
float *a_pack_ptr_ = nullptr;
|
||||
float *b_pack_ptr_ = nullptr;
|
||||
float *bias_ptr_{nullptr};
|
||||
bool vec_matmul_{false};
|
||||
|
||||
private:
|
||||
int col_tile_{0};
|
||||
int row_tile_{0};
|
||||
int thread_stride_{0};
|
||||
int thread_count_{0};
|
||||
size_t bias_pack_ptr_size_{0};
|
||||
size_t a_pack_ptr_size_{0};
|
||||
size_t b_pack_ptr_size_{0};
|
||||
};
|
||||
} // namespace mindspore::lite::micro::nnacl
|
||||
#endif // MINDSPORE_LITE_MICRO_CODER_OPCODERS_NNACL_FP32_MATMUL_FP32_BASE_CODER_H_
|
|
@ -0,0 +1,92 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "coder/opcoders/nnacl/fp32/matmul_fp32_coder.h"
|
||||
#include <vector>
|
||||
#include "coder/log.h"
|
||||
#include "coder/opcoders/file_collector.h"
|
||||
|
||||
using mindspore::schema::PrimitiveType_MatMul;
|
||||
|
||||
namespace mindspore::lite::micro::nnacl {
|
||||
|
||||
int MatMulFP32Coder::InitShapeA() {
|
||||
std::vector<int> a_shape = input_tensor_->shape();
|
||||
int a_shape_size = static_cast<int>(a_shape.size());
|
||||
if (a_shape_size < kBiasIndex) {
|
||||
MS_LOG(ERROR) << "a_shape_size is less than two";
|
||||
return RET_ERROR;
|
||||
}
|
||||
int batch = 1;
|
||||
for (int i = 0; i < a_shape_size - 2; ++i) {
|
||||
batch *= a_shape.at(i);
|
||||
}
|
||||
params_->batch = batch;
|
||||
params_->row_ = params_->a_transpose_ ? a_shape.at(a_shape_size - 1) : a_shape.at(a_shape_size - 2);
|
||||
params_->deep_ = params_->a_transpose_ ? a_shape.at(a_shape_size - 2) : a_shape.at(a_shape_size - 1);
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int MatMulFP32Coder::InitShapeB() {
|
||||
std::vector<int> b_shape = filter_tensor_->shape();
|
||||
int b_shape_size = static_cast<int>(b_shape.size());
|
||||
if (b_shape_size < kBiasIndex) {
|
||||
MS_LOG(ERROR) << "a_shape_size is less than two";
|
||||
return RET_ERROR;
|
||||
}
|
||||
int batch = 1;
|
||||
for (int i = 0; i < b_shape_size - 2; ++i) {
|
||||
batch *= b_shape.at(i);
|
||||
}
|
||||
params_->batch = batch;
|
||||
params_->col_ = params_->b_transpose_ ? b_shape.at(b_shape_size - 2) : b_shape.at(b_shape_size - 1);
|
||||
params_->deep_ = params_->b_transpose_ ? b_shape.at(b_shape_size - 1) : b_shape.at(b_shape_size - 2);
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
// this function is a temporary for inferShapeDone
|
||||
int MatMulFP32Coder::ReSize() {
|
||||
MS_CHECK_RET_CODE(InitShapeA(), "MatMulFP32Coder init_shape_a failed");
|
||||
MS_CHECK_RET_CODE(InitShapeB(), "MatMulFP32Coder init_shape_b failed");
|
||||
return MatMulFP32BaseCoder::ReSize();
|
||||
}
|
||||
|
||||
int MatMulFP32Coder::Prepare(CoderContext *const context) {
|
||||
params_ = reinterpret_cast<MatMulParameter *>(parameter_);
|
||||
filter_tensor_ = input_tensors_.at(kWeightIndex);
|
||||
MS_CHECK_PTR(filter_tensor_);
|
||||
if (input_tensors_.size() == kInputSize2) {
|
||||
bias_tensor_ = input_tensors_.at(kBiasIndex);
|
||||
MS_CHECK_PTR(bias_tensor_);
|
||||
MS_CHECK_PTR(bias_tensor_->data_c());
|
||||
}
|
||||
params_->a_const_ = (input_tensor_->data_c() != nullptr);
|
||||
params_->b_const_ = (filter_tensor_->data_c() != nullptr);
|
||||
MatMulFP32BaseCoder::InitParameter();
|
||||
if (params_->a_const_) {
|
||||
InitShapeA();
|
||||
}
|
||||
if (params_->b_const_) {
|
||||
InitShapeB();
|
||||
}
|
||||
MS_CHECK_RET_CODE(MatMulFP32BaseCoder::Init(), "MatMulFP32Coder init failed");
|
||||
return ReSize();
|
||||
}
|
||||
|
||||
int MatMulFP32Coder::DoCode(CoderContext *const context) { return MatMulFP32BaseCoder::DoCode(context); }
|
||||
|
||||
REG_OPERATOR_CODER(kAllTargets, kNumberTypeFloat32, PrimitiveType_MatMul, CPUOpCoderCreator<MatMulFP32Coder>)
|
||||
} // namespace mindspore::lite::micro::nnacl
|
|
@ -0,0 +1,43 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_LITE_MICRO_CODER_OPCODERS_NNACL_FP32_MATMUL_FP32_CODER_H_
|
||||
#define MINDSPORE_LITE_MICRO_CODER_OPCODERS_NNACL_FP32_MATMUL_FP32_CODER_H_
|
||||
|
||||
#include <vector>
|
||||
#include "coder/opcoders/nnacl/fp32/matmul_fp32_base_coder.h"
|
||||
#include "nnacl/matmul_parameter.h"
|
||||
|
||||
namespace mindspore::lite::micro::nnacl {
|
||||
class MatMulFP32Coder final : public MatMulFP32BaseCoder {
|
||||
public:
|
||||
MatMulFP32Coder(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors,
|
||||
const Model::Node *node, size_t node_index, Target target)
|
||||
: MatMulFP32BaseCoder(in_tensors, out_tensors, node, node_index, target) {}
|
||||
|
||||
~MatMulFP32Coder() override = default;
|
||||
|
||||
int Prepare(CoderContext *const context) override;
|
||||
|
||||
int DoCode(CoderContext *const context) override;
|
||||
|
||||
private:
|
||||
int InitShapeA();
|
||||
int InitShapeB();
|
||||
int ReSize() override;
|
||||
};
|
||||
} // namespace mindspore::lite::micro::nnacl
|
||||
#endif // MINDSPORE_LITE_MICRO_CODER_OPCODERS_NNACL_FP32_MATMUL_FP32_CODER_H_
|
|
@ -0,0 +1,235 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "coder/opcoders/nnacl/int8/fullconnection_int8_coder.h"
|
||||
#include "nnacl/int8/matmul_int8.h"
|
||||
#include "coder/opcoders/file_collector.h"
|
||||
#include "coder/log.h"
|
||||
#include "coder/opcoders/serializers/nnacl_serializer/nnacl_int8_serializer.h"
|
||||
|
||||
using mindspore::schema::PrimitiveType_FullConnection;
|
||||
|
||||
namespace mindspore::lite::micro::nnacl {
|
||||
|
||||
FullConnectionInt8Coder ::~FullConnectionInt8Coder() { FreeQuantParam(); }
|
||||
|
||||
int FullConnectionInt8Coder::MallocQuantParam() {
|
||||
filter_tensor_ = input_tensors_.at(kWeightIndex);
|
||||
MS_CHECK_PTR(filter_tensor_);
|
||||
std::vector<QuantArg> weight_quant_params = filter_tensor_->quant_params();
|
||||
MS_CHECK_TRUE(!filter_tensor_->shape().empty(), "filter tensor shape is empty");
|
||||
int col = filter_tensor_->shape().front();
|
||||
filter_per_channel_ = (weight_quant_params.size() > 1);
|
||||
init_size_ = filter_per_channel_ ? col : 1;
|
||||
quant_.filter_scale_ = reinterpret_cast<float *>(malloc(init_size_ * sizeof(float)));
|
||||
MS_CHECK_PTR(quant_.filter_scale_);
|
||||
quant_.filter_zp_ = reinterpret_cast<int32_t *>(malloc(init_size_ * sizeof(int32_t)));
|
||||
MS_CHECK_PTR(quant_.filter_zp_);
|
||||
quant_.left_shift_ = reinterpret_cast<int32_t *>(malloc(init_size_ * sizeof(int32_t)));
|
||||
MS_CHECK_PTR(quant_.left_shift_);
|
||||
quant_.right_shift_ = reinterpret_cast<int32_t *>(malloc(init_size_ * sizeof(int32_t)));
|
||||
MS_CHECK_PTR(quant_.right_shift_);
|
||||
quant_.quant_multiplier_ = reinterpret_cast<int32_t *>(malloc(init_size_ * sizeof(int32_t)));
|
||||
MS_CHECK_PTR(quant_.quant_multiplier_);
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
void FullConnectionInt8Coder::FreeQuantParam() {
|
||||
if (quant_.filter_scale_ != nullptr) {
|
||||
free(quant_.filter_scale_);
|
||||
quant_.filter_scale_ = nullptr;
|
||||
}
|
||||
if (quant_.filter_zp_ != nullptr) {
|
||||
free(quant_.filter_zp_);
|
||||
quant_.filter_zp_ = nullptr;
|
||||
}
|
||||
if (quant_.left_shift_ != nullptr) {
|
||||
free(quant_.left_shift_);
|
||||
quant_.left_shift_ = nullptr;
|
||||
}
|
||||
if (quant_.right_shift_ != nullptr) {
|
||||
free(quant_.right_shift_);
|
||||
quant_.right_shift_ = nullptr;
|
||||
}
|
||||
if (quant_.quant_multiplier_ != nullptr) {
|
||||
free(quant_.quant_multiplier_);
|
||||
quant_.quant_multiplier_ = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
void FullConnectionInt8Coder::InitParam() {
|
||||
int row = 1;
|
||||
int out_put_tensor_size = static_cast<int>(output_tensor_->shape().size());
|
||||
for (int i = 0; i < out_put_tensor_size - 1; ++i) {
|
||||
row *= (output_tensor_->shape()).at(i);
|
||||
}
|
||||
fc_param_->row_ = row;
|
||||
fc_param_->col_ = output_tensor_->shape().back();
|
||||
fc_param_->deep_ = filter_tensor_->shape().at(1);
|
||||
fc_param_->row_4_ = UP_ROUND(fc_param_->row_, C4NUM);
|
||||
fc_param_->col_4_ = UP_ROUND(fc_param_->col_, C4NUM);
|
||||
fc_param_->col_8_ = UP_ROUND(fc_param_->col_, C8NUM);
|
||||
fc_param_->deep_16_ = UP_ROUND(fc_param_->deep_, C16NUM);
|
||||
thread_count_ = MSMIN(thread_num_, UP_DIV(fc_param_->col_4_, C4NUM));
|
||||
thread_stride_ = UP_DIV(UP_DIV(fc_param_->col_4_, C4NUM), thread_count_);
|
||||
}
|
||||
|
||||
int FullConnectionInt8Coder::ReSize(CoderContext *const context) {
|
||||
InitParam();
|
||||
pack_a_ptr_size_ = static_cast<size_t>(fc_param_->row_4_ * fc_param_->deep_16_ * sizeof(int8_t));
|
||||
pack_a_ptr_ = reinterpret_cast<int8_t *>(allocator_->Malloc(kNumberTypeInt8, pack_a_ptr_size_, kOfflinePackWeight));
|
||||
MS_CHECK_PTR(pack_a_ptr_);
|
||||
pack_b_ptr_size_ = static_cast<size_t>(fc_param_->col_4_ * fc_param_->deep_16_ * sizeof(int8_t));
|
||||
weight_bias_sums_size_ = static_cast<size_t>(fc_param_->col_4_ * sizeof(int));
|
||||
if (fc_param_->b_const_) {
|
||||
pack_b_ptr_ = reinterpret_cast<int8_t *>(allocator_->Malloc(kNumberTypeInt8, kOnlineSize, kOnlinePackWeight));
|
||||
MS_CHECK_PTR(pack_b_ptr_);
|
||||
weight_bias_sums_ = reinterpret_cast<int *>(allocator_->Malloc(kNumberTypeInt, kOnlineSize, kOnlinePackWeight));
|
||||
} else {
|
||||
pack_b_ptr_ = reinterpret_cast<int8_t *>(allocator_->Malloc(kNumberTypeInt8, pack_b_ptr_size_, kOfflinePackWeight));
|
||||
MS_CHECK_PTR(pack_b_ptr_);
|
||||
weight_bias_sums_ =
|
||||
reinterpret_cast<int *>(allocator_->Malloc(kNumberTypeInt, weight_bias_sums_size_, kOfflinePackWeight));
|
||||
}
|
||||
MS_CHECK_PTR(weight_bias_sums_);
|
||||
input_sums_size_ = static_cast<size_t>(fc_param_->row_4_ * sizeof(int));
|
||||
input_sums_ = reinterpret_cast<int *>(allocator_->Malloc(kNumberTypeInt, input_sums_size_, kOfflinePackWeight));
|
||||
MS_CHECK_PTR(input_sums_);
|
||||
if (input_tensors_.size() == kInputSize2) {
|
||||
bias_ptr_size_ = static_cast<size_t>(fc_param_->col_4_ * sizeof(int));
|
||||
bias_ptr_ = reinterpret_cast<int *>(allocator_->Malloc(kNumberTypeInt, kOnlineSize, kOnlinePackWeight));
|
||||
MS_CHECK_PTR(bias_ptr_);
|
||||
} else {
|
||||
bias_ptr_ = nullptr;
|
||||
}
|
||||
NNaclInt8Serializer init_code;
|
||||
if (input_tensors_.size() == kInputSize2) {
|
||||
init_code.CodeMallocExpression(bias_ptr_, bias_ptr_size_);
|
||||
init_code.CodeFunction("memset", bias_ptr_, 0, bias_ptr_size_);
|
||||
init_code.CodeFunction("memcpy", bias_ptr_, bias_tensor_, bias_ptr_);
|
||||
}
|
||||
if (fc_param_->b_const_) {
|
||||
init_code.CodeMallocExpression(pack_b_ptr_, pack_b_ptr_size_);
|
||||
init_code.CodeMallocExpression(weight_bias_sums_, weight_bias_sums_size_);
|
||||
init_code.CodeFunction("RowMajor2Row16x4MajorInt8", filter_tensor_, pack_b_ptr_, fc_param_->col_, fc_param_->deep_);
|
||||
init_code.CodeFunction("CalcWeightBiasSums", filter_tensor_, fc_param_->deep_, fc_param_->col_, quant_.input_.zp_,
|
||||
quant_.filter_zp_, bias_ptr_, weight_bias_sums_, ColMajor, filter_per_channel_);
|
||||
}
|
||||
context->AppendInitCode(init_code.str());
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int FullConnectionInt8Coder::Init() {
|
||||
fc_param_ = reinterpret_cast<MatMulParameter *>(parameter_);
|
||||
filter_tensor_ = input_tensors_.at(kWeightIndex);
|
||||
MS_CHECK_PTR(filter_tensor_);
|
||||
if (input_tensors_.size() == kInputSize2) {
|
||||
bias_tensor_ = input_tensors_.at(kBiasIndex);
|
||||
MS_CHECK_PTR(bias_tensor_);
|
||||
MS_CHECK_PTR(bias_tensor_->data_c());
|
||||
}
|
||||
fc_param_->a_const_ = (input_tensor_->data_c() != nullptr);
|
||||
fc_param_->b_const_ = (filter_tensor_->data_c() != nullptr);
|
||||
int ret = MallocQuantParam();
|
||||
if (ret != RET_OK) {
|
||||
FreeQuantParam();
|
||||
return ret;
|
||||
}
|
||||
std::vector<QuantArg> in_quant_params = input_tensor_->quant_params();
|
||||
MS_CHECK_TRUE(!in_quant_params.empty(), "in_quant_params empty is empty");
|
||||
quant_.input_.zp_ = in_quant_params.front().zeroPoint;
|
||||
quant_.input_.scale_ = static_cast<float>(in_quant_params.front().scale);
|
||||
std::vector<QuantArg> out_quant_params = output_tensor_->quant_params();
|
||||
MS_CHECK_TRUE(!out_quant_params.empty(), "out_quant_params empty is empty");
|
||||
quant_.output_.zp_ = out_quant_params.front().zeroPoint;
|
||||
quant_.output_.scale_ = static_cast<float>(out_quant_params.front().scale);
|
||||
|
||||
int weight_quant_num = filter_per_channel_ ? static_cast<int>(filter_tensor_->shape().front()) : 1;
|
||||
std::vector<QuantArg> weight_quant_params = filter_tensor_->quant_params();
|
||||
MS_CHECK_TRUE(!weight_quant_params.empty(), "weight_quant_params empty is empty");
|
||||
for (int i = 0; i < weight_quant_num; i++) {
|
||||
quant_.filter_zp_[i] = weight_quant_params[i].zeroPoint;
|
||||
quant_.filter_scale_[i] = static_cast<float>(weight_quant_params[i].scale);
|
||||
}
|
||||
|
||||
for (int i = 0; i < weight_quant_num; ++i) {
|
||||
auto in_scale = static_cast<double>(quant_.input_.scale_ * quant_.filter_scale_[i]);
|
||||
double real_multiplier = in_scale / static_cast<double>(quant_.output_.scale_);
|
||||
QuantizeRoundParameterWithDoublePrecision(real_multiplier, &quant_.quant_multiplier_[i], &quant_.left_shift_[i],
|
||||
&quant_.right_shift_[i]);
|
||||
}
|
||||
CalculateActivationRangeQuantized(fc_param_->act_type_ == ActType_Relu, fc_param_->act_type_ == ActType_Relu6,
|
||||
quant_.output_.zp_, quant_.output_.scale_, &quant_.out_act_min_,
|
||||
&quant_.out_act_max_);
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int FullConnectionInt8Coder::Prepare(CoderContext *const context) {
|
||||
// only support one thread currently
|
||||
thread_count_ = thread_num_;
|
||||
MS_CHECK_RET_CODE(Init(), "FullConnectionInt8Coder init failed");
|
||||
return ReSize(context);
|
||||
}
|
||||
|
||||
int FullConnectionInt8Coder::DoCode(CoderContext *const context) {
|
||||
Collect(context, {"nnacl/common_func.h", "nnacl/int8/common_func_int8.h", "nnacl/int8/matmul_int8.h"},
|
||||
{"common_func.c", "common_func_int8.c", "matmul_int8.c"});
|
||||
|
||||
NNaclInt8Serializer code;
|
||||
code.precision(kPrecision);
|
||||
code.CodeFunction("memset", input_sums_, 0, input_sums_size_);
|
||||
code.CodeFunction("memset", pack_a_ptr_, 0, pack_a_ptr_size_);
|
||||
code.CodeFunction("RowMajor2Row16x4MajorInt8", input_tensor_, pack_a_ptr_, fc_param_->row_, fc_param_->deep_);
|
||||
int32_t tmp_weight_zp = filter_per_channel_ ? 1 : quant_.filter_zp_[0];
|
||||
code.CodeFunction("CalcInputSums", input_tensor_, fc_param_->row_, fc_param_->deep_, tmp_weight_zp, input_sums_,
|
||||
RowMajor);
|
||||
|
||||
if (!fc_param_->b_const_) {
|
||||
code.CodeFunction("memset", pack_b_ptr_, 0, pack_b_ptr_size_);
|
||||
code.CodeFunction("memset", weight_bias_sums_, 0, weight_bias_sums_size_);
|
||||
code.CodeFunction("RowMajor2Row16x4MajorInt8", filter_tensor_, pack_b_ptr_, fc_param_->col_, fc_param_->deep_);
|
||||
code.CodeFunction("CalcWeightBiasSums", filter_tensor_, fc_param_->deep_, fc_param_->col_, quant_.input_.zp_,
|
||||
quant_.filter_zp_, bias_ptr_, weight_bias_sums_, ColMajor, filter_per_channel_);
|
||||
}
|
||||
int stride = thread_stride_ * C4NUM;
|
||||
int res_stride = fc_param_->col_;
|
||||
int cur_oc = MSMIN(stride, res_stride);
|
||||
if (cur_oc <= 0) {
|
||||
return RET_OK;
|
||||
}
|
||||
int32_t *cur_left = quant_.left_shift_;
|
||||
int32_t *cur_right = quant_.right_shift_;
|
||||
int32_t *cur_mul = quant_.quant_multiplier_;
|
||||
int32_t *cur_zp = quant_.filter_zp_;
|
||||
|
||||
code.CodeArray("cur_left_shift", cur_left, init_size_, true);
|
||||
code.CodeArray("cur_right_shift", cur_right, init_size_, true);
|
||||
code.CodeArray("cur_multiplier", cur_mul, init_size_, true);
|
||||
code.CodeArray("cur_filter_zp", cur_zp, init_size_, true);
|
||||
|
||||
code.CodeFunction("MatmulInt8Opt", pack_a_ptr_, pack_b_ptr_, output_tensor_->data_c(), fc_param_->row_, cur_oc,
|
||||
fc_param_->deep_16_, input_sums_, weight_bias_sums_, quant_.out_act_min_, quant_.out_act_max_,
|
||||
quant_.output_.zp_, "&cur_multiplier", "&cur_left_shift", "&cur_right_shift", fc_param_->col_,
|
||||
filter_per_channel_, "&cur_filter_zp");
|
||||
MS_LOG(DEBUG) << "FullConnectionInt8Coder has been called";
|
||||
context->AppendCode(code.str());
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
REG_OPERATOR_CODER(kAllTargets, kNumberTypeInt8, PrimitiveType_FullConnection,
|
||||
CPUOpCoderCreator<FullConnectionInt8Coder>)
|
||||
|
||||
} // namespace mindspore::lite::micro::nnacl
|
|
@ -0,0 +1,68 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_LITE_MICRO_CODER_OPCODERS_NNACL_INT8_FULLCONNECTION_INT8_CODER_H_
|
||||
#define MINDSPORE_LITE_MICRO_CODER_OPCODERS_NNACL_INT8_FULLCONNECTION_INT8_CODER_H_
|
||||
|
||||
#include <string>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
#include "coder/opcoders/op_coder.h"
|
||||
#include "nnacl/int8/quantize.h"
|
||||
#include "nnacl/matmul_parameter.h"
|
||||
|
||||
namespace mindspore::lite::micro::nnacl {
|
||||
class FullConnectionInt8Coder final : public OperatorCoder {
|
||||
public:
|
||||
FullConnectionInt8Coder(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors,
|
||||
const Model::Node *node, size_t node_index, Target target)
|
||||
: OperatorCoder(in_tensors, out_tensors, node, node_index, target) {}
|
||||
|
||||
~FullConnectionInt8Coder() override;
|
||||
|
||||
int Prepare(CoderContext *const context) override;
|
||||
|
||||
int DoCode(CoderContext *const context) override;
|
||||
|
||||
private:
|
||||
int Init();
|
||||
int ReSize(CoderContext *const context);
|
||||
int MallocQuantParam();
|
||||
void FreeQuantParam();
|
||||
void InitParam();
|
||||
|
||||
private:
|
||||
MatmulQuantParameter quant_{0};
|
||||
MatMulParameter *fc_param_{nullptr};
|
||||
Tensor *filter_tensor_{nullptr};
|
||||
Tensor *bias_tensor_{nullptr};
|
||||
size_t pack_a_ptr_size_{0};
|
||||
int8_t *pack_a_ptr_ = nullptr;
|
||||
size_t pack_b_ptr_size_{0};
|
||||
int8_t *pack_b_ptr_{nullptr};
|
||||
size_t input_sums_size_{0};
|
||||
int *input_sums_{nullptr};
|
||||
size_t weight_bias_sums_size_{0};
|
||||
int *weight_bias_sums_{nullptr};
|
||||
size_t bias_ptr_size_{0};
|
||||
int *bias_ptr_{nullptr};
|
||||
int thread_count_{1};
|
||||
int thread_stride_{0};
|
||||
bool filter_per_channel_{true};
|
||||
int init_size_{0};
|
||||
};
|
||||
} // namespace mindspore::lite::micro::nnacl
|
||||
#endif // MINDSPORE_LITE_MICRO_CODER_OPCODERS_NNACL_INT8_FULLCONNECTION_INT8_CODER_H_
|
|
@ -0,0 +1,193 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "coder/opcoders/nnacl/int8/matmul_int8_coder.h"
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include "coder/opcoders/serializers/nnacl_serializer/nnacl_int8_serializer.h"
|
||||
#include "coder/opcoders/file_collector.h"
|
||||
namespace mindspore::lite::micro::nnacl {
|
||||
|
||||
int MatMulInt8Coder::ReSize(CoderContext *const context) {
|
||||
int batch = 1;
|
||||
std::vector<int> x_shape = input_tensor_->shape();
|
||||
std::vector<int> o_shape = output_tensor_->shape();
|
||||
if (x_shape.size() <= 2 || o_shape.size() <= 2) {
|
||||
MS_LOG(ERROR) << "x_shape.size() or o_shape.size() is less than two";
|
||||
return RET_ERROR;
|
||||
}
|
||||
for (size_t i = 0; i < x_shape.size() - 2; ++i) {
|
||||
batch *= x_shape.at(i);
|
||||
}
|
||||
params_->batch = batch;
|
||||
params_->row_ = o_shape.at(o_shape.size() - 2);
|
||||
params_->col_ = o_shape.at(o_shape.size() - 1);
|
||||
params_->deep_ = params_->a_transpose_ ? x_shape.at(x_shape.size() - 2) : x_shape.at(x_shape.size() - 1);
|
||||
params_->row_4_ = UP_ROUND(params_->row_, C4NUM);
|
||||
params_->col_4_ = UP_ROUND(params_->col_, C4NUM);
|
||||
params_->deep_16_ = UP_ROUND(params_->deep_, C16NUM);
|
||||
|
||||
a_pack_ptr_size_ = static_cast<size_t>(params_->row_4_ * params_->deep_16_ * sizeof(int8_t));
|
||||
a_pack_ptr_ = reinterpret_cast<int8_t *>(allocator_->Malloc(kNumberTypeInt8, a_pack_ptr_size_, kOfflinePackWeight));
|
||||
MS_CHECK_PTR(a_pack_ptr_);
|
||||
input_sums_size_ = static_cast<size_t>(params_->row_4_ * sizeof(int));
|
||||
input_sums_ = reinterpret_cast<int *>(allocator_->Malloc(kNumberTypeInt, input_sums_size_, kOfflinePackWeight));
|
||||
MS_CHECK_PTR(input_sums_);
|
||||
b_pack_batch_ptr_size_ = static_cast<size_t>(params_->batch * params_->col_4_ * params_->deep_16_ * sizeof(int8_t));
|
||||
if (params_->b_const_) {
|
||||
b_pack_batch_ptr_ = reinterpret_cast<int8_t *>(allocator_->Malloc(kNumberTypeInt8, kOnlineSize, kOnlinePackWeight));
|
||||
MS_CHECK_PTR(b_pack_batch_ptr_);
|
||||
weight_bias_sums_batch_ =
|
||||
reinterpret_cast<int *>(allocator_->Malloc(kNumberTypeInt, kOnlineSize, kOnlinePackWeight));
|
||||
} else {
|
||||
b_pack_batch_ptr_ =
|
||||
reinterpret_cast<int8_t *>(allocator_->Malloc(kNumberTypeInt8, b_pack_batch_ptr_size_, kOfflinePackWeight));
|
||||
MS_CHECK_PTR(b_pack_batch_ptr_);
|
||||
weight_bias_sums_batch_ =
|
||||
reinterpret_cast<int *>(allocator_->Malloc(kNumberTypeInt, weight_bias_sums_batch_size_, kOfflinePackWeight));
|
||||
}
|
||||
MS_CHECK_PTR(weight_bias_sums_batch_);
|
||||
if (input_tensors_.size() == 3) {
|
||||
bias_prt_size_ = static_cast<size_t>(params_->col_4_ * sizeof(int));
|
||||
bias_ptr_ = reinterpret_cast<int *>(allocator_->Malloc(kNumberTypeInt, kOnlineSize, kOnlinePackWeight));
|
||||
MS_CHECK_PTR(bias_ptr_);
|
||||
} else {
|
||||
bias_ptr_ = nullptr;
|
||||
}
|
||||
thread_count_ = MSMIN(thread_num_, UP_DIV(params_->col_4_, C4NUM));
|
||||
thread_stride_ = UP_DIV(UP_DIV(params_->col_4_, C4NUM), thread_count_);
|
||||
|
||||
std::vector<QuantArg> params = input_tensor_->quant_params();
|
||||
MS_CHECK_TRUE(!params.empty(), "params is empty");
|
||||
quant_params_.input.zp_ = params.front().zeroPoint;
|
||||
quant_params_.input.scale_ = static_cast<float>(params.front().scale);
|
||||
|
||||
params = filter_tensor_->quant_params();
|
||||
MS_CHECK_TRUE(!params.empty(), "params is empty");
|
||||
quant_params_.weight.zp_ = params.front().zeroPoint;
|
||||
quant_params_.weight.scale_ = static_cast<float>(params.front().scale);
|
||||
|
||||
params = output_tensor_->quant_params();
|
||||
MS_CHECK_TRUE(!params.empty(), "params is empty");
|
||||
quant_params_.output.zp_ = params.front().zeroPoint;
|
||||
quant_params_.output.scale_ = static_cast<float>(params.front().scale);
|
||||
double real_multiplier = quant_params_.input.scale_ * quant_params_.weight.scale_ / quant_params_.output.scale_;
|
||||
QuantizeRoundParameterWithDoublePrecision(real_multiplier, &quant_params_.quant_multiplier, &quant_params_.left_shift,
|
||||
&quant_params_.right_shift);
|
||||
if (params_->b_const_) {
|
||||
NNaclInt8Serializer init_code;
|
||||
if (bias_ptr_) {
|
||||
init_code.CodeMallocExpression(bias_ptr_, bias_prt_size_);
|
||||
init_code.CodeFunction("memset", bias_ptr_, 0, bias_prt_size_);
|
||||
init_code.CodeFunction("memcpy", bias_ptr_, bias_tensor_->data_c(), bias_prt_size_);
|
||||
}
|
||||
init_code.CodeMallocExpression(weight_bias_sums_batch_, weight_bias_sums_batch_size_);
|
||||
init_code.CodeFunction("memset", weight_bias_sums_batch_, 0, weight_bias_sums_batch_size_);
|
||||
init_code.CodeMallocExpression(b_pack_batch_ptr_, b_pack_batch_ptr_size_);
|
||||
init_code.CodeFunction("memset", b_pack_batch_ptr_, 0, b_pack_batch_ptr_size_);
|
||||
|
||||
init_code << "int tmp_weight_zp = " << quant_params_.weight.zp_ << ";\n";
|
||||
init_code.CodeFunction("InitIn8MatrixB", filter_tensor_->data_c(), weight_bias_sums_batch_, b_pack_batch_ptr_,
|
||||
params_->batch, params_->deep_, params_->col_, params_->col_4_, params_->deep_16_,
|
||||
quant_params_.input.zp_, "&tmp_weight_zp", bias_ptr_, params_->b_transpose_);
|
||||
context->AppendInitCode(init_code.str());
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int MatMulInt8Coder::Init() {
|
||||
params_ = reinterpret_cast<MatMulParameter *>(parameter_);
|
||||
filter_tensor_ = input_tensors_.at(kWeightIndex);
|
||||
MS_CHECK_PTR(filter_tensor_);
|
||||
if (input_tensors_.size() == kInputSize2) {
|
||||
bias_tensor_ = input_tensors_.at(kBiasIndex);
|
||||
MS_CHECK_PTR(bias_tensor_);
|
||||
MS_CHECK_PTR(bias_tensor_->data_c());
|
||||
}
|
||||
params_->b_const_ = (filter_tensor_->data_c() != nullptr);
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int MatMulInt8Coder::Prepare(CoderContext *const context) {
|
||||
MS_CHECK_RET_CODE(Init(), "MatMulInt8Coder Init failed");
|
||||
MS_CHECK_RET_CODE(ReSize(context), "MatMulInt8Coder ReSize failed");
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int MatMulInt8Coder::DoCode(CoderContext *const context) {
|
||||
Collect(context, {"nnacl/common_func.h", "nnacl/int8/common_func_int8.h", "nnacl/int8/matmul_int8.h"},
|
||||
{"common_func.c", "common_func_int8.c", "matmul_int8.c"});
|
||||
|
||||
std::string a_ptr_str = allocator_->GetRuntimeAddr(input_tensor_);
|
||||
std::string c_ptr_str = allocator_->GetRuntimeAddr(output_tensor_);
|
||||
int a_stride = params_->row_ * params_->deep_;
|
||||
int c_stride = params_->row_ * params_->col_;
|
||||
|
||||
NNaclInt8Serializer code;
|
||||
code.precision(kPrecision);
|
||||
int task_id = 0;
|
||||
int cur_oc = MSMIN(thread_stride_, UP_DIV(params_->col_4_, C4NUM) - task_id * thread_stride_);
|
||||
if (cur_oc <= 0) {
|
||||
return RET_OK;
|
||||
}
|
||||
code << "int tmp_weight_zp = " << quant_params_.weight.zp_ << ";\n";
|
||||
if (!params_->b_const_) {
|
||||
code.CodeFunction("InitIn8MatrixB", filter_tensor_->data_c(), weight_bias_sums_batch_, b_pack_batch_ptr_,
|
||||
params_->batch, params_->deep_, params_->col_, params_->col_4_, params_->deep_16_,
|
||||
quant_params_.input.zp_, "&tmp_weight_zp", bias_ptr_, params_->b_transpose_);
|
||||
}
|
||||
std::string b_batch_str = allocator_->GetRuntimeAddr(b_pack_batch_ptr_);
|
||||
std::string weight_bias_sums_batch_str = allocator_->GetRuntimeAddr(weight_bias_sums_batch_);
|
||||
code.CodeFunction("memset", input_sums_, 0, input_sums_size_);
|
||||
code.CodeFunction("memset", a_pack_ptr_, 0, a_pack_ptr_size_);
|
||||
code << "for (int i = 0; i < " << params_->batch << "; ++i) {\n";
|
||||
code << " int8_t* cur_a_ptr = " << a_ptr_str << " + i * " << a_stride << ";\n";
|
||||
if (params_->a_transpose_) {
|
||||
code.CodeFunction("RowMajor2Col16x4MajorInt8", "cur_a_ptr", params_->deep_, params_->row_, a_pack_ptr_);
|
||||
code.CodeFunction("CalcInputSums", "cur_a_ptr", params_->deep_, quant_params_.weight.zp_, input_sums_, ColMajor);
|
||||
} else {
|
||||
code.CodeFunction("RowMajor2Row16x4MajorInt8", "cur_a_ptr", a_pack_ptr_, params_->row_, params_->deep_);
|
||||
code.CodeFunction("CalcInputSums", "cur_a_ptr", params_->row_, params_->deep_, quant_params_.weight.zp_,
|
||||
input_sums_, RowMajor);
|
||||
}
|
||||
code << " b_pack_ptr_ = " << b_batch_str << " + i * " << params_->col_4_ * params_->deep_16_ << ";\n";
|
||||
code << " weight_bias_sums_ = " << weight_bias_sums_batch_str << " + i * " << params_->col_4_ << ";\n";
|
||||
code << " c_ptr_ = " << c_ptr_str << " + i * " << c_stride << ";\n";
|
||||
int cur_oc_res = MSMIN(thread_stride_ * C4NUM, params_->col_ - task_id * thread_stride_ * C4NUM);
|
||||
|
||||
code << " int8_t* cur_b = b_pack_ptr_ + " << task_id * thread_stride_ * C4NUM * params_->deep_16_ << ";\n";
|
||||
code << " int32_t* cur_bias = weight_bias_sums_ + " << task_id * thread_stride_ * C4NUM << ";\n";
|
||||
code << " int8_t *cur_c = c_ptr_ + " << task_id * thread_stride_ * C4NUM << ";\n";
|
||||
code << " static const int left_shift = " << quant_params_.left_shift << ";\n";
|
||||
code << " static const int right_shift = " << quant_params_.right_shift << ";\n";
|
||||
code << " static const int quant_multiplier = " << quant_params_.quant_multiplier << ";\n";
|
||||
if (target_ == kARM64) {
|
||||
code.CodeFunction("MatmulInt8Neon64", "cur_a_ptr", "cur_b", "cur_c", params_->row_4_, cur_oc * C4NUM,
|
||||
params_->deep_16_, input_sums_, "cur_bias", INT8_MIN, INT8_MAX, quant_params_.output.zp_,
|
||||
"&quant_multiplier", "&left_shift", "&right_shift", params_->row_, cur_oc_res, params_->col_,
|
||||
false);
|
||||
} else {
|
||||
code.CodeFunction("MatMulInt8_16x4_r", "cur_a_ptr", "cur_b", "cur_c", params_->row_, cur_oc_res, params_->deep_16_,
|
||||
params_->col_, input_sums_, "cur_bias", "&left_shift", "&right_shift", "&quant_multiplier",
|
||||
quant_params_.output.zp_, INT8_MIN, INT8_MAX, false);
|
||||
}
|
||||
code << "}\n";
|
||||
MS_LOG(DEBUG) << "FullConnectionInt8Coder has been called";
|
||||
context->AppendCode(code.str());
|
||||
|
||||
return RET_OK;
|
||||
}
|
||||
} // namespace mindspore::lite::micro::nnacl
|
|
@ -0,0 +1,57 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_LITE_MICRO_CODER_OPCODERS_NNACL_INT8_MATMUL_INT8_CODER_H_
|
||||
#define MINDSPORE_LITE_MICRO_CODER_OPCODERS_NNACL_INT8_MATMUL_INT8_CODER_H_
|
||||
#include <vector>
|
||||
#include "coder/opcoders/op_coder.h"
|
||||
#include "nnacl/matmul_parameter.h"
|
||||
namespace mindspore::lite::micro::nnacl {
|
||||
class MatMulInt8Coder final : public OperatorCoder {
|
||||
public:
|
||||
MatMulInt8Coder(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors,
|
||||
const Model::Node *node, size_t node_index, Target target)
|
||||
: OperatorCoder(in_tensors, out_tensors, node, node_index, target) {}
|
||||
~MatMulInt8Coder() override;
|
||||
|
||||
int Prepare(CoderContext *const context) override;
|
||||
|
||||
int DoCode(CoderContext *const context) override;
|
||||
|
||||
private:
|
||||
int Init();
|
||||
int ReSize(CoderContext *const context);
|
||||
|
||||
private:
|
||||
Tensor *filter_tensor_{nullptr};
|
||||
Tensor *bias_tensor_{nullptr};
|
||||
MatMulParameter *params_{nullptr};
|
||||
MatmulQuantArg quant_params_{0};
|
||||
size_t a_pack_ptr_size_{0};
|
||||
int8_t *a_pack_ptr_{nullptr};
|
||||
size_t b_pack_batch_ptr_size_{0};
|
||||
int8_t *b_pack_batch_ptr_{nullptr};
|
||||
size_t bias_prt_size_{0};
|
||||
int *bias_ptr_{nullptr};
|
||||
size_t input_sums_size_{0};
|
||||
int *input_sums_{nullptr};
|
||||
size_t weight_bias_sums_batch_size_{0};
|
||||
int *weight_bias_sums_batch_{nullptr};
|
||||
int thread_stride_{0};
|
||||
int thread_count_{0};
|
||||
};
|
||||
} // namespace mindspore::lite::micro::nnacl
|
||||
#endif // MINDSPORE_LITE_MICRO_CODER_OPCODERS_NNACL_INT8_MATMUL_INT8_CODER_H_
|
|
@ -66,6 +66,16 @@ void NNaclFp32Serializer::CodeStruct(const std::string &name, const ConvParamete
|
|||
conv_parameter.input_unit_, conv_parameter.output_unit_, conv_parameter.act_type_);
|
||||
}
|
||||
|
||||
void NNaclFp32Serializer::CodeStruct(const std::string &name, const MatMulParameter &mat_mul_parameter) {
|
||||
CodeBaseStruct("MatMulParameter", name, mat_mul_parameter.op_parameter_, mat_mul_parameter.has_bias_,
|
||||
mat_mul_parameter.row_, mat_mul_parameter.col_, mat_mul_parameter.row_4_, mat_mul_parameter.row_6_,
|
||||
mat_mul_parameter.row_12_, mat_mul_parameter.row_16_, mat_mul_parameter.row_align_,
|
||||
mat_mul_parameter.col_4_, mat_mul_parameter.col_8_, mat_mul_parameter.col_align_,
|
||||
mat_mul_parameter.deep_, mat_mul_parameter.deep_4_, mat_mul_parameter.deep_16_,
|
||||
mat_mul_parameter.batch, mat_mul_parameter.a_transpose_, mat_mul_parameter.b_transpose_,
|
||||
mat_mul_parameter.a_const_, mat_mul_parameter.b_const_, mat_mul_parameter.act_type_);
|
||||
}
|
||||
|
||||
void NNaclFp32Serializer::CodeStruct(const std::string &name, const ScaleParameter &scale_parameter) {
|
||||
CodeBaseStruct("ScaleParameter", name, scale_parameter.op_parameter_, scale_parameter.outer_size_,
|
||||
scale_parameter.axis_size_, scale_parameter.inner_size_, scale_parameter.axis_,
|
||||
|
|
|
@ -116,4 +116,10 @@ void NNaclInt8Serializer::CodeStruct(const std::string &name, const ReshapeQuant
|
|||
reshape_quant_arg.output_activation_min_, reshape_quant_arg.output_activation_max_);
|
||||
}
|
||||
|
||||
void NNaclInt8Serializer::CodeStruct(const std::string &name, const MatmulQuantArg &matmul_quant_arg) {
|
||||
CodeBaseStruct("MatmulQuantArg", name, matmul_quant_arg.input, matmul_quant_arg.weight, matmul_quant_arg.output,
|
||||
matmul_quant_arg.out_act_min, matmul_quant_arg.out_act_max, matmul_quant_arg.left_shift,
|
||||
matmul_quant_arg.right_shift, matmul_quant_arg.quant_multiplier);
|
||||
}
|
||||
|
||||
} // namespace mindspore::lite::micro::nnacl
|
||||
|
|
|
@ -45,6 +45,7 @@ class NNaclInt8Serializer : public Serializer {
|
|||
int out_shape);
|
||||
void CodeStruct(const std::string &name, const ReduceQuantArg &reduce_quant_arg);
|
||||
void CodeStruct(const std::string &name, const ReshapeQuantArg &reshape_quant_arg);
|
||||
void CodeStruct(const std::string &name, const MatmulQuantArg &matmul_quant_arg);
|
||||
};
|
||||
|
||||
} // namespace mindspore::lite::micro::nnacl
|
||||
|
|
|
@ -0,0 +1,65 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "wrapper/fp32/matmul_fp32_wrapper.h"
|
||||
void InitMatrixA(const float *src_ptr, float *dst_ptr, const MatMulParameter *params_, bool is_vector_a) {
|
||||
if (is_vector_a) {
|
||||
memcpy(dst_ptr, src_ptr, params_->batch * params_->deep_ * sizeof(float));
|
||||
return;
|
||||
}
|
||||
for (int i = 0; i < params_->batch; i++) {
|
||||
const float *src = src_ptr + i * params_->deep_ * params_->row_;
|
||||
#ifdef ENABLE_ARM32
|
||||
float *dst = dst_ptr + i * params_->deep_ * params_->row_4_;
|
||||
if (params_->a_transpose_) {
|
||||
RowMajor2Row4Major(src, dst, params_->deep_, params_->row_);
|
||||
} else {
|
||||
RowMajor2Col4Major(src, dst, params_->row_, params_->deep_);
|
||||
}
|
||||
#else
|
||||
float *dst = dst_ptr + i * params_->deep_ * params_->row_12_;
|
||||
if (params_->a_transpose_) {
|
||||
RowMajor2Row12Major(src, dst, params_->deep_, params_->row_);
|
||||
} else {
|
||||
RowMajor2Col12Major(src, dst, params_->row_, params_->deep_);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
void InitMatrixB(const float *src_ptr, float *dst_ptr, const MatMulParameter *params_, bool is_vector_a) {
|
||||
if (is_vector_a) {
|
||||
if (params_->b_transpose_) {
|
||||
memcpy(dst_ptr, src_ptr, params_->batch * params_->col_ * params_->deep_ * sizeof(float));
|
||||
} else {
|
||||
for (int i = 0; i < params_->batch; i++) {
|
||||
const float *src = src_ptr + i * params_->deep_ * params_->col_;
|
||||
float *dst = dst_ptr + i * params_->deep_ * params_->col_;
|
||||
RowMajor2ColMajor(src, dst, params_->deep_, params_->col_);
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
for (int i = 0; i < params_->batch; i++) {
|
||||
const float *src = src_ptr + i * params_->deep_ * params_->col_;
|
||||
float *dst = dst_ptr + i * params_->deep_ * params_->col_8_;
|
||||
if (params_->b_transpose_) {
|
||||
RowMajor2Col8Major(src, dst, params_->col_, params_->deep_);
|
||||
} else {
|
||||
RowMajor2Row8Major(src, dst, params_->deep_, params_->col_);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,33 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_LITE_MICRO_ADAPTER_FP32_MATMUL_FP32_WRAPPER_H_
|
||||
#define MINDSPORE_LITE_MICRO_ADAPTER_FP32_MATMUL_FP32_WRAPPER_H_
|
||||
#include <string.h>
|
||||
#include "nnacl/fp32/matmul_fp32.h"
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
void InitMatrixA(const float *src_ptr, float *dst_ptr, const MatMulParameter *params_, bool is_vector_a);
|
||||
|
||||
void InitMatrixB(const float *src_ptr, float *dst_ptr, const MatMulParameter *params_, bool is_vector_a);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // MINDSPORE_LITE_MICRO_ADAPTER_FP32_MATMUL_FP32_WRAPPER_H_
|
|
@ -0,0 +1,47 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "wrapper/int8/matmul_int8_wrapper.h"
|
||||
|
||||
void InitInt8MatrixA(int8_t *src_ptr, int32_t *input_sums, int8_t *dst_ptr, int batch, int row, int deep, int input_zp,
|
||||
const int *weight_zp, bool a_transpose) {
|
||||
for (int i = 0; i < batch; ++i) {
|
||||
int8_t *cur_a_ptr = src_ptr + i * row * deep;
|
||||
if (a_transpose) {
|
||||
RowMajor2Col16x4MajorInt8(cur_a_ptr, deep, row, dst_ptr);
|
||||
CalcInputSums(cur_a_ptr, row, deep, *weight_zp, input_sums, ColMajor);
|
||||
} else {
|
||||
RowMajor2Row16x4MajorInt8(cur_a_ptr, dst_ptr, row, deep);
|
||||
CalcInputSums(cur_a_ptr, row, deep, *weight_zp, input_sums, RowMajor);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void InitInt8MatrixB(int8_t *src_ptr, int32_t *weight_bias_sums_batch_, int8_t *dst_ptr, int batch, int deep, int col,
|
||||
int col_4, int deep_16, int input_zp, int *weight_zp, const int *bias_ptr, bool b_transpose) {
|
||||
for (int i = 0; i < batch; ++i) {
|
||||
int8_t *cur_b = src_ptr + i * deep * col;
|
||||
int8_t *cur_b_pack = dst_ptr + i * col_4 * deep_16;
|
||||
int32_t *cur_sums = weight_bias_sums_batch_ + i * col_4;
|
||||
if (b_transpose) {
|
||||
RowMajor2Row16x4MajorInt8(cur_b, cur_b_pack, col, deep);
|
||||
CalcWeightBiasSums(cur_b, deep, col, input_zp, weight_zp, bias_ptr, cur_sums, ColMajor, false);
|
||||
} else {
|
||||
RowMajor2Col16x4MajorInt8(cur_b, deep, col, cur_b_pack);
|
||||
CalcWeightBiasSums(cur_b, deep, col, input_zp, weight_zp, bias_ptr, cur_sums, RowMajor, false);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,35 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_LITE_MICRO_WRAPPER_INT8_MATMUL_INT8_WRAPPER_H_
|
||||
#define MINDSPORE_LITE_MICRO_WRAPPER_INT8_MATMUL_INT8_WRAPPER_H_
|
||||
#include <string.h>
|
||||
#include "nnacl/int8/matmul_int8.h"
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
void InitInt8MatrixA(int8_t *src_ptr, int32_t *input_sums, int8_t *dst_ptr, int batch, int row, int deep, int input_zp,
|
||||
const int *weight_zp, bool a_transpose);
|
||||
|
||||
void InitInt8MatrixB(int8_t *src_ptr, int32_t *weight_bias_sums_batch_, int8_t *dst_ptr, int batch, int deep, int col,
|
||||
int col_4, int deep_16, int input_zp, int *weight_zp, const int *bias_ptr, bool b_transpose);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // MINDSPORE_LITE_MICRO_WRAPPER_INT8_MATMUL_INT8_WRAPPER_H_
|
Loading…
Reference in New Issue