sync code of micro to master

This commit is contained in:
yangjie159 2021-03-09 11:20:42 +08:00
parent fa4c19f938
commit 4faf97f6bd
24 changed files with 262 additions and 80 deletions

View File

@ -136,6 +136,8 @@ if(PLATFORM_ARM64)
COMPONENT ${RUNTIME_COMPONENT_NAME})
install(DIRECTORY ${TOP_DIR}/include/api/ DESTINATION ${RUNTIME_INC_DIR}/api
COMPONENT ${RUNTIME_COMPONENT_NAME} FILES_MATCHING PATTERN "*.h" PATTERN "ascend* ops*" EXCLUDE)
install(DIRECTORY ${TOP_DIR}/mindspore/lite/build/operator_library DESTINATION ${CODEGEN_PKG_NAME}
COMPONENT ${CODEGEN_COMPONENT_NAME})
if(ENABLE_TOOLS)
install(TARGETS benchmark RUNTIME DESTINATION ${RUNTIME_PKG_NAME}/benchmark COMPONENT ${RUNTIME_COMPONENT_NAME})
endif()
@ -157,6 +159,8 @@ elseif(PLATFORM_ARM32)
COMPONENT ${RUNTIME_COMPONENT_NAME})
install(DIRECTORY ${TOP_DIR}/include/api/ DESTINATION ${RUNTIME_INC_DIR}/api
COMPONENT ${RUNTIME_COMPONENT_NAME} FILES_MATCHING PATTERN "*.h" PATTERN "ascend*" EXCLUDE)
install(DIRECTORY ${TOP_DIR}/mindspore/lite/build/operator_library DESTINATION ${CODEGEN_PKG_NAME}
COMPONENT ${CODEGEN_COMPONENT_NAME})
if(ENABLE_TOOLS)
install(TARGETS benchmark RUNTIME DESTINATION ${RUNTIME_PKG_NAME}/benchmark COMPONENT ${RUNTIME_COMPONENT_NAME})
endif()
@ -231,6 +235,8 @@ else()
install(FILES ${glog_LIBPATH}/libglog.so.0.4.0
DESTINATION ${CONVERTER_PKG_NAME}/third_party/glog/lib RENAME libglog.so.0
COMPONENT ${CONVERTER_COMPONENT_NAME})
install(DIRECTORY ${TOP_DIR}/mindspore/lite/build/operator_library DESTINATION ${CODEGEN_PKG_NAME}
COMPONENT ${CODEGEN_COMPONENT_NAME})
install(TARGETS codegen RUNTIME DESTINATION ${CODEGEN_PKG_NAME}/
COMPONENT ${CODEGEN_COMPONENT_NAME})
endif()
@ -249,7 +255,7 @@ else()
endif()
set(CPACK_ARCHIVE_COMPONENT_INSTALL ON)
if(PLATFORM_ARM64 OR PLATFORM_ARM32)
set(CPACK_COMPONENTS_ALL ${RUNTIME_COMPONENT_NAME})
set(CPACK_COMPONENTS_ALL ${RUNTIME_COMPONENT_NAME} ${CODEGEN_COMPONENT_NAME})
else()
set(CPACK_COMPONENTS_ALL ${RUNTIME_COMPONENT_NAME} ${CONVERTER_COMPONENT_NAME} ${CODEGEN_COMPONENT_NAME})
endif()

View File

@ -34,7 +34,7 @@
#define LOG_HDR_FILE_REL_PATH "mindspore/core/utils/log_adapter.h"
// Get start index of file relative path in __FILE__
static constexpr int GetRelPathPos() noexcept {
static constexpr size_t GetRelPathPos() noexcept {
return sizeof(__FILE__) > sizeof(LOG_HDR_FILE_REL_PATH) ? sizeof(__FILE__) - sizeof(LOG_HDR_FILE_REL_PATH) : 0;
}

View File

@ -89,8 +89,10 @@ if(SUPPORT_TRAIN)
else()
if(PLATFORM_ARM64)
set(RUNTIME_COMPONENT_NAME inference-android-aarch64)
set(CODEGEN_COMPONENT_NAME codegen-android-aarch64)
elseif(PLATFORM_ARM32)
set(RUNTIME_COMPONENT_NAME inference-android-aarch32)
set(CODEGEN_COMPONENT_NAME codegen-android-aarch32)
elseif(WIN32)
if("${X86_64_SIMD}" STREQUAL "off")
set(RUNTIME_COMPONENT_NAME inference-win-x64)
@ -218,7 +220,6 @@ if(ENABLE_CONVERTER)
include(${TOP_DIR}/cmake/external_libs/eigen.cmake)
include(${TOP_DIR}/cmake/external_libs/protobuf.cmake)
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/tools/converter)
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/micro/coder)
endif()
if(ENABLE_MINDRT)
@ -272,6 +273,7 @@ endif()
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/src)
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/nnacl)
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/micro/coder)
if(ENABLE_TOOLS)
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/tools/benchmark)
if(SUPPORT_TRAIN)

View File

@ -301,6 +301,30 @@ set(LITE_KERNEL_SRC
${LITE_DIR}/nnacl/infer/splice_infer.c
)
list(APPEND FILE_SET ${CODER_SRC} ${CODER_OPCODERS_SRC} ${CODER_GENERATOR_SRC}
${CODER_ALLOCATOR_SRC} ${LITE_SRC} ${LITE_KERNEL_SRC} ${MINDSPORE_CORE})
#### sse
if("${X86_64_SIMD}" STREQUAL "sse")
set(SSE_SRC
${LITE_DIR}/nnacl/intrinsics/sse/sse_common.c
${LITE_DIR}/nnacl/intrinsics/sse/PackNHWCToNCHWFp32.c
${LITE_DIR}/nnacl/intrinsics/sse/MatMul_Sse.c
)
set_property(SOURCE ${SSE_SRC} PROPERTY LANGUAGE C)
endif()
#### avx
if("${X86_64_SIMD}" STREQUAL "avx")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse4.1 -mavx -mavx2")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -msse4.1 -mavx -mavx2")
set(AVX_SRC
${LITE_DIR}/nnacl/intrinsics/avx/common_utils.c
${LITE_DIR}/nnacl/intrinsics/sse/sse_common.c
${LITE_DIR}/nnacl/intrinsics/sse/MatMul_Sse.c
${LITE_DIR}/nnacl/intrinsics/sse/PackNHWCToNCHWFp32.c
${LITE_DIR}/nnacl/assembly/avx/MatmulAvx.S
)
set_property(SOURCE ${AVX_SRC} PROPERTY LANGUAGE C)
endif()
list(APPEND FILE_SET ${CODER_SRC} ${CODER_OPCODERS_SRC} ${CODER_GENERATOR_SRC}
${CODER_ALLOCATOR_SRC} ${LITE_SRC} ${LITE_KERNEL_SRC} ${MINDSPORE_CORE} ${SSE_SRC} ${AVX_SRC})

View File

@ -25,6 +25,7 @@ include(${MICRO_DIR}/cmake/file_list.cmake)
include(${MICRO_DIR}/cmake/package_wrapper.cmake)
add_subdirectory(operator_library)
if(NOT PLATFORM_ARM32 AND NOT PLATFORM_ARM64)
add_executable(codegen main.cc ${FILE_SET})
add_dependencies(codegen fbs_src)
add_dependencies(codegen fbs_inner_src)
@ -32,3 +33,4 @@ target_link_libraries(codegen PRIVATE ${SECUREC_LIBRARY} mindspore::glog)
if(NOT WIN32 AND "${CMAKE_BUILD_TYPE}" STREQUAL "Release")
add_custom_command(TARGET codegen POST_BUILD COMMAND strip ${CODEGEN_PATH})
endif()
endif()

View File

@ -92,19 +92,17 @@ class MemoryAllocator {
* including tensor, workspace
*/
template <typename T>
std::string GetRuntimeAddr(T t, bool is_const = false) {
std::string GetRuntimeAddr(T t, bool immutable = false) {
if (!t) {
return "";
}
std::string type_info = is_const ? "const " : "";
std::string type_name;
if (std::type_index(typeid(T)) == std::type_index(typeid(Tensor *))) {
type_name = GetTensorDataType(reinterpret_cast<Tensor *>(t)->data_type()) + "*";
} else {
type_name = GetVariableTypeName<T>();
}
type_info = wrap(type_info + type_name);
std::string type_info = wrap(type_name);
void *variable = reinterpret_cast<void *>(t);
auto item = inputs_addr_.find(variable);
if (item != inputs_addr_.end()) {
@ -133,6 +131,9 @@ class MemoryAllocator {
[&variable](const std::pair<Tensor *, std::string> &a) { return variable == a.first; });
if (iter != origin_weights_addr_.end()) {
saved_weights_addr_.insert(std::make_pair(iter->second, reinterpret_cast<Tensor *>(variable)));
if (immutable) {
malloc_weights_addr_.insert({reinterpret_cast<Tensor *>(variable), iter->second});
}
return iter->second;
}
MS_LOG(ERROR) << "uninitialized memory";

View File

@ -134,7 +134,7 @@ void CodeBenchmarkInference(std::ofstream &ofs, const std::string &module_name)
<< " uint64_t timeAvg = 0;\n"
<< " int loop_count = atoi(argv[3]);\n"
<< " printf(\"======Inference Start======\\n\");\n"
<< " printf(\"cycles: %d\", loop_count);\n"
<< " printf(\"cycles: %d\\n\", loop_count);\n"
<< " for (int i = 0; i < loop_count; i++) {\n"
<< " uint64_t runBegin = GetTimeUs();\n"
<< " " << module_name << "_Inference();\n"

View File

@ -48,7 +48,7 @@ void CodeCMakeNetLibrary(std::ofstream &ofs, const std::string &module_name, con
}
ofs << "file(GLOB NET_SRC ${CMAKE_CURRENT_SOURCE_DIR}/*.c)\n"
<< "add_library(${PROJ_NAME} STATIC ${NET_SRC})\n";
<< "add_library(net STATIC ${NET_SRC})\n";
}
} // namespace mindspore::lite::micro

View File

@ -19,9 +19,8 @@
const char *bench_cmake_lists_txt =
"cmake_minimum_required(VERSION 3.14)\n"
"project(${PROJ_NAME})\n"
"project(benchmark)\n"
"\n"
"message(\"project name: ${PROJ_NAME}\")\n"
"message(\"project name: ${MODEL_LIB_PATH}\")\n"
"message(\"architecture cmake file path: ${ARCH_CMAKE_PATH}\")\n"
"\n"
@ -54,14 +53,13 @@ const char *bench_cmake_lists_txt =
"endif ()\n"
"link_directories(${MODEL_LIB_PATH})\n"
"include(benchmark.cmake)\n"
"add_executable(${PROJ_NAME}_bench ${SRC_FILES})\n"
"target_link_libraries(${PROJ_NAME}_bench ${MODEL_LIB_NAME} -lm -pthread)\n";
"add_executable(benchmark ${SRC_FILES})\n"
"target_link_libraries(benchmark ${MODEL_LIB_NAME} -lm -pthread)\n";
const char *src_cmake_lists_txt =
"cmake_minimum_required(VERSION 3.14)\n"
"project(${PROJ_NAME})\n"
"project(net)\n"
"\n"
"message(\"project name: ${PROJ_NAME}\")\n"
"message(\"architecture cmake file path: ${ARCH_CMAKE_PATH}\")\n"
"message(\"operator lib path: ${OP_LIB}\")\n"
"message(\"operator header path: ${OP_HEADER_PATH}\")\n"
@ -83,10 +81,11 @@ const char *src_cmake_lists_txt =
"else()\n"
" set(CMAKE_C_FLAGS \"-fPIC -fPIE -O3 -Werror -fstack-protector-strong -fomit-frame-pointer ${CMAKE_C_FLAGS}\")\n"
" set(CMAKE_C_FLAGS_Release \"${CMAKE_C_FLAGS_Release} -O3 -ffunction-sections -Werror -fdata-sections\")\n"
" string(REPLACE \"-g\" \"\" CMAKE_C_FLAGS \"${CMAKE_C_FLAGS}\")\n"
"endif()\n"
"\n"
"function(create_library)\n"
" add_custom_command(TARGET ${PROJ_NAME}\n"
" add_custom_command(TARGET net\n"
" POST_BUILD\n"
" COMMAND rm -rf tmp\n"
" COMMAND mkdir tmp\n"
@ -97,9 +96,9 @@ const char *src_cmake_lists_txt =
" COMMENT \"unzip raw static library ${library_name}\"\n"
" )\n"
" foreach (object_file ${OP_SRC})\n"
" add_custom_command(TARGET ${PROJ_NAME} POST_BUILD COMMAND mv ./tmp/${object_file} .)\n"
" add_custom_command(TARGET net POST_BUILD COMMAND mv ./tmp/${object_file} .)\n"
" endforeach ()\n"
" add_custom_command(TARGET ${PROJ_NAME}\n"
" add_custom_command(TARGET net\n"
" POST_BUILD\n"
" COMMAND ar cr ${library_name} *.o\n"
" COMMAND ranlib ${library_name}\n"
@ -109,7 +108,7 @@ const char *src_cmake_lists_txt =
" COMMENT \"generate specified static library ${library_name}\"\n"
" )\n"
"endfunction(create_library)\n"
"string(CONCAT library_name \"lib\" ${PROJ_NAME} \".a\")\n"
"string(CONCAT library_name \"lib\" net \".a\")\n"
"create_library()\n";
#endif // MINDSPORE_LITE_MICRO_CODER_GENERATOR_CONST_BLOCKS_CMAKE_LISTS_CODE_H_

View File

@ -36,7 +36,7 @@ void CodeCreateThreadPool(std::ofstream &ofs, const std::string &module_name) {
" MICRO_ERROR(\"set global thread pool failed\");\n"
" return RET_ERROR;\n"
" }\n"
" MICRO_INFO(\"config: ThreadNum: %d, BindMode: %d\", thread_num, bind_mode);\n";
" printf(\"config: ThreadNum: %d, BindMode: %d\\n\", thread_num, bind_mode);\n";
}
void CodeDestroyThreadPool(std::ofstream &ofs) { ofs << " DestroyThreadPool(thread_pool);\n"; }

View File

@ -17,9 +17,9 @@
#include "coder/generator/component/weight_component.h"
#include <memory>
#include <utility>
#include <algorithm>
#include "coder/generator/component/const_blocks/license.h"
#include "coder/utils/coder_utils.h"
#include "coder/opcoders/parallel.h"
namespace mindspore::lite::micro {
void CodeWeightFileHeader(std::ofstream &ofs, const std::unique_ptr<CoderContext> &ctx) {
@ -89,7 +89,7 @@ void CodeWeightInitFunc(std::ofstream &ofs, const std::string &module_name, cons
<< " if (weight_buffer == NULL) {\n"
<< " return RET_ERROR;\n"
<< " }\n";
ofs << " int " << gThreadNum << " = 1;\n\n";
ofs << " struct ModelParameter {\n"
<< " void *addr;\n"
<< " size_t size;\n"

View File

@ -82,9 +82,9 @@ int DetectionPostProcessBaseCoder::AllocateBuffer() {
MS_CHECK_PTR(params_->decoded_boxes_);
params_->nms_candidate_ = allocator_->Malloc(kNumberTypeUInt8, num_boxes_ * sizeof(uint8_t), kWorkspace);
MS_CHECK_PTR(params_->nms_candidate_);
params_->selected_ = allocator_->Malloc(kNumberTypeInt, num_boxes_ * sizeof(int), kWorkspace);
params_->selected_ = allocator_->Malloc(kNumberTypeInt32, num_boxes_ * sizeof(int), kWorkspace);
MS_CHECK_PTR(params_->selected_);
params_->single_class_indexes_ = allocator_->Malloc(kNumberTypeInt, num_boxes_ * sizeof(int), kWorkspace);
params_->single_class_indexes_ = allocator_->Malloc(kNumberTypeInt32, num_boxes_ * sizeof(int), kWorkspace);
MS_CHECK_PTR(params_->single_class_indexes_);
if (params_->use_regular_nms_) {
@ -92,13 +92,13 @@ int DetectionPostProcessBaseCoder::AllocateBuffer() {
allocator_->Malloc(kNumberTypeFloat, (num_boxes_ + params_->max_detections_) * sizeof(float), kWorkspace);
MS_CHECK_PTR(params_->scores_);
params_->indexes_ =
allocator_->Malloc(kNumberTypeInt, (num_boxes_ + params_->max_detections_) * sizeof(int), kWorkspace);
allocator_->Malloc(kNumberTypeInt32, (num_boxes_ + params_->max_detections_) * sizeof(int), kWorkspace);
MS_CHECK_PTR(params_->indexes_);
params_->all_class_scores_ =
allocator_->Malloc(kNumberTypeFloat, (num_boxes_ + params_->max_detections_) * sizeof(float), kWorkspace);
MS_CHECK_PTR(params_->all_class_scores_);
params_->all_class_indexes_ =
allocator_->Malloc(kNumberTypeInt, (num_boxes_ + params_->max_detections_) * sizeof(int), kWorkspace);
allocator_->Malloc(kNumberTypeInt32, (num_boxes_ + params_->max_detections_) * sizeof(int), kWorkspace);
MS_CHECK_PTR(params_->all_class_indexes_);
} else {
params_->scores_ = allocator_->Malloc(kNumberTypeFloat, num_boxes_ * sizeof(float), kWorkspace);

View File

@ -36,7 +36,7 @@ int BiasAddFP32Coder::DoCode(CoderContext *ctx) {
return RET_ERROR;
}
size_t data_size = input_tensor_->ElementsNum();
std::string bias_str = allocator_->GetRuntimeAddr(input_tensors_.at(kWeightIndex));
std::string bias_str = allocator_->GetRuntimeAddr(input_tensors_.at(kWeightIndex), true);
Collect(ctx,
{"nnacl/arithmetic.h", "nnacl/nnacl_utils.h", "nnacl/nnacl_common.h", "nnacl/base/arithmetic_base.h",
"nnacl/fp32/add_fp32.h", "nnacl/fp32/arithmetic_fp32.h"},

View File

@ -183,13 +183,15 @@ int Conv2DINT8Coder::Resize() {
int Conv2DINT8Coder::DoCode(CoderContext *const context) {
std::vector<std::string> asm_files;
if (target_ == kARM32A) {
asm_files = {"PreSum4x16Int8Peroc.S", "PreSum4x16Int8Pert.S", "MatmulInt8Neon32.S"};
asm_files = {"PreSum4x16Int8Peroc.S", "PreSum4x16Int8Pert.S", "MatmulInt8.S"};
} else if (target_ == kARM64) {
asm_files = {"PreSum4x16Int8Peroc.S", "PreSum4x16Int8Pert.S", "MatmulInt8Neon64.S"};
asm_files = {"PreSum4x16Int8Peroc.S", "PreSum4x16Int8Pert.S", "MatmulInt8.S", "MatmulDpInt8.S"};
}
Collect(context, {"nnacl/int8/conv_int8.h", "nnacl/common_func.h", "wrapper/int8/convolution_int8_wrapper.h"},
Collect(context,
{"nnacl/int8/conv_int8.h", "nnacl/common_func.h", "wrapper/int8/convolution_int8_wrapper.h",
"wrapper/base/common_wrapper.h", "wrapper/base/optimize_handler_wrapper.h"},
{"common_func.c", "pack_int8.c", "conv_int8.c", "winograd_transform.c", "matmul_int8.c", "fixed_point.c",
"convolution_int8_wrapper.c", "conv_init_int8_wrapper.c", "thread_pool.c"},
"convolution_int8_wrapper.c", "conv_init_int8_wrapper.c", "common_wrapper.c", "optimize_handler_wrapper.c"},
asm_files);
// call the op function
nnacl::NNaclInt8Serializer code;
@ -202,7 +204,6 @@ int Conv2DINT8Coder::DoCode(CoderContext *const context) {
code.CodeBaseStruct("ConvolutionInt8Args", kRunArgs, input_tensor_, packed_input_, matmul_packed_input_,
packed_weight_, bias_data_, output_tensor_, filter_zp_ptr_, input_sum_,
"(ConvParameter *)&conv_param", matmul_func_, support_optimize_);
code.CodeFunction("CheckSupportOptimize", kRunArgsAddr);
if (support_parallel_) {
code.CodeFunction(kParallelLaunch, gThreadPool, "ConvolutionInt8Run", kRunArgsAddr, gThreadNum);
} else {

View File

@ -44,10 +44,8 @@ class Conv2DINT8Coder final : public Conv2DBaseCoder {
}
private:
int InitWeightBias(CoderContext *ctx);
void CheckSupportOptimize();
int InitWeightBias(CoderContext *ctx);
int InitTmpBuffer(CoderContext *ctx);
int Resize();
@ -70,7 +68,7 @@ class Conv2DINT8Coder final : public Conv2DBaseCoder {
int32_t *input_sum_{nullptr};
int8_t *matmul_packed_input_{nullptr};
std::string matmul_func_;
std::string matmul_func_{"NULL"};
std::function<int(nnacl::NNaclInt8Serializer &, const std::string &, const std::string &)> pack_weight_init_{nullptr};
};

View File

@ -168,9 +168,13 @@ class Serializer {
* "int pointer_gen[4] = {1 ,3, 2, 42};\n
* const Foo foo_gen = {{1, 2, 3}, pointer_gen, 4};\n"
*/
template <typename... PARAMETERS>
template <bool immutable = true, typename... PARAMETERS>
void CodeBaseStruct(const std::string &type, const std::string &name, PARAMETERS... parameters) {
if constexpr (immutable) {
code << "const " << type << " " << name << " = {";
} else {
code << type << " " << name << " = {";
}
GenCode(parameters...);
code << "};\n";
}

View File

@ -22,7 +22,6 @@ endif()
set(MICRO_CMAKE_PATH ${MICRO_DIR}/cmake)
set(OPERATOR_LIBRARY_PATH ${CMAKE_BINARY_DIR}/operator_library)
set(HEADER_PATH "${OPERATOR_LIBRARY_PATH}/include")
set(LIB_PATH "${OPERATOR_LIBRARY_PATH}/lib/x86")
message("===========>start to pack operators' head file")
file(REMOVE_RECURSE ${OPERATOR_LIBRARY_PATH})
@ -36,14 +35,31 @@ file(REMOVE_RECURSE ${HEADER_PATH}/nnacl/assembly)
file(REMOVE_RECURSE ${HEADER_PATH}/nnacl/fp16)
file(REMOVE_RECURSE ${HEADER_PATH}/nnacl/fp16_grad)
file(REMOVE_RECURSE ${HEADER_PATH}/nnacl/fp32_grad)
file(REMOVE_RECURSE ${HEADER_PATH}/nnacl/intrinsics)
file(REMOVE_RECURSE ${HEADER_PATH}/nnacl/optimize)
if(PLATFORM_ARM64)
set(MICRO_BUILD_ARM64 ON)
endif()
if(PLATFORM_ARM32)
set(MICRO_BUILD_ARM32A ON)
endif()
include(${MICRO_CMAKE_PATH}/package_android.cmake)
include(${MICRO_CMAKE_PATH}/package_nnacl.cmake)
include(${MICRO_CMAKE_PATH}/package_cmsis.cmake)
include(${MICRO_CMAKE_PATH}/package_wrapper.cmake)
list(APPEND OP_FILES ${NNACL_OPS} ${WRAPPER_SRC} ${RUNTIME_SRC})
if(PLATFORM_ARM64)
set(LIB_PATH "${OPERATOR_LIBRARY_PATH}/lib/arm64")
elseif(PLATFORM_ARM32)
set(LIB_PATH "${OPERATOR_LIBRARY_PATH}/lib/arm32a")
else()
set(LIB_PATH "${OPERATOR_LIBRARY_PATH}/lib/x86")
list(APPEND OP_FILES ${CMSIS_OPS})
endif()
# generate static library
add_library(ops STATIC ${NNACL_OPS} ${CMSIS_OPS} ${WRAPPER_SRC} ${RUNTIME_SRC})
add_library(ops STATIC ${OP_FILES})
install(TARGETS ops ARCHIVE DESTINATION ${LIB_PATH})

View File

@ -0,0 +1,36 @@
/*
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "wrapper/base/common_wrapper.h"
#ifdef __ANDROID__
#include <sys/auxv.h>
#include <asm/hwcap.h>
#endif
bool GetSupportOptFlag() {
bool status = false;
#ifdef ENABLE_ARM64
int hwcap_type = 16;
// getHwCap
uint32_t hwcap = getauxval(hwcap_type);
if (hwcap & HWCAP_ASIMDDP) {
status = true;
} else {
status = false;
}
#endif
return status;
}

View File

@ -0,0 +1,24 @@
/*
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_LITE_MICRO_CODER_OPERATOR_LIBRARY_COMMON_WRAPPER_H_
#define MINDSPORE_LITE_MICRO_CODER_OPERATOR_LIBRARY_COMMON_WRAPPER_H_
#include "nnacl/op_base.h"
bool GetSupportOptFlag();
#endif // MINDSPORE_LITE_MICRO_CODER_OPERATOR_LIBRARY_COMMON_WRAPPER_H_

View File

@ -0,0 +1,49 @@
/*
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "wrapper/base/optimize_handler_wrapper.h"
extern void MatMulOptR4Int8Neon64(const int8_t *a, const int8_t *b, int *dst, int row4, int col4, int deep16,
const int *input_sum, const int *bias);
extern void MatmulInt8DpNeon64(const int8_t *a, const int8_t *b, int8_t *dst, int row8, int col8, int deep4,
const int *a_sums, const int *bias, int act_min, int act_max, int out_zp,
int *multiplier, int *left_shift, int *right_shift, int row, int col, int stride,
size_t peroc);
extern void MatmulInt8DpOpt(const int8_t *a, const int8_t *b, int8_t *dst, size_t row8, size_t col8, size_t deep4,
const int *a_sums, const int *bias, int act_min, int act_max, int out_zp, int *multiplier,
int *left_shift, int *right_shift, size_t stride, size_t peroc, int *filter_zp);
#ifdef ENABLE_ARM64
void MatMulR4Int8_optimize_handler(const int8_t *a, const int8_t *b, int *dst, int row4, int col4, int deep16,
const int *input_sum, const int *bias) {
return MatMulOptR4Int8Neon64(a, b, dst, row4, col4, deep16, input_sum, bias);
}
void MatMulRInt8_optimize_handler(const int8_t *a, const int8_t *b, int8_t *dst, size_t row, size_t col, size_t deep_4,
size_t stride, const int32_t *input_sum, const int32_t *bias, int32_t *left_shift,
int32_t *right_shift, int32_t *multiplier, int32_t output_zp, int32_t mini,
int32_t maxi, size_t per_channel) {
return MatmulInt8DpNeon64(a, b, dst, UP_ROUND(row, C8NUM), UP_ROUND(col, C8NUM), deep_4, input_sum, bias, mini, maxi,
output_zp, multiplier, left_shift, right_shift, row, col, stride, per_channel);
}
void MatMulDpInt8_optimize_handler(const int8_t *a, const int8_t *b, int8_t *dst, size_t row, size_t col, size_t deep_4,
size_t stride, const int32_t *input_sum, const int32_t *bias, int32_t *left_shift,
int32_t *right_shift, int32_t *multiplier, int32_t output_zp, int32_t mini,
int32_t maxi, size_t per_channel, int32_t *filter_zp) {
return MatmulInt8DpOpt(a, b, dst, row, col, deep_4, input_sum, bias, mini, maxi, output_zp, multiplier, left_shift,
right_shift, stride, per_channel, filter_zp);
}
#endif

View File

@ -0,0 +1,41 @@
/*
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_LITE_MICRO_CODER_OPERATOR_LIBRARY_OPTIMIZE_HANDLER_WRAPPER_H_
#define MINDSPORE_LITE_MICRO_CODER_OPERATOR_LIBRARY_OPTIMIZE_HANDLER_WRAPPER_H_
#include "nnacl/op_base.h"
#ifdef ENABLE_ARM64
void IndirectGemmInt8_optimize_handler(int8_t *dst, const int8_t *src, const int8_t *weight, const int32_t *bias,
size_t ksize, size_t ic4, size_t output_channel, size_t offset,
const int32_t *input_sum, size_t act_min, size_t act_max, size_t out_zp,
int32_t *out_multiplier, int32_t *shift_before, int32_t *shift_after,
size_t asymmetric, size_t per_channel, size_t per_channel_offset);
void MatMulR4Int8_optimize_handler(const int8_t *a, const int8_t *b, int *dst, int row4, int col4, int deep16,
const int *input_sum, const int *bias);
void MatMulRInt8_optimize_handler(const int8_t *a, const int8_t *b, int8_t *dst, size_t row, size_t col, size_t deep_4,
size_t stride, const int32_t *input_sum, const int32_t *bias, int32_t *left_shift,
int32_t *right_shift, int32_t *multiplier, int32_t output_zp, int32_t mini,
int32_t maxi, size_t per_channel);
void MatMulDpInt8_optimize_handler(const int8_t *a, const int8_t *b, int8_t *dst, size_t row, size_t col, size_t deep_4,
size_t stride, const int32_t *input_sum, const int32_t *bias, int32_t *left_shift,
int32_t *right_shift, int32_t *multiplier, int32_t output_zp, int32_t mini,
int32_t maxi, size_t per_channel, int32_t *filter_zp);
#endif
#endif // MINDSPORE_LITE_MICRO_CODER_OPERATOR_LIBRARY_OPTIMIZE_HANDLER_WRAPPER_H_

View File

@ -22,21 +22,12 @@ void InitMatrixA(const float *src_ptr, float *dst_ptr, const MatMulParameter *pa
}
for (int i = 0; i < params_->batch; i++) {
const float *src = src_ptr + i * params_->deep_ * params_->row_;
#ifdef ENABLE_ARM32
float *dst = dst_ptr + i * params_->deep_ * params_->row_4_;
if (params_->a_transpose_) {
RowMajor2Row4Major(src, dst, params_->deep_, params_->row_);
} else {
RowMajor2Col4Major(src, dst, params_->row_, params_->deep_);
}
#else
float *dst = dst_ptr + i * params_->deep_ * params_->row_12_;
float *dst = dst_ptr + i * params_->deep_ * params_->row_align_;
if (params_->a_transpose_) {
RowMajor2Row12Major(src, dst, params_->deep_, params_->row_);
} else {
RowMajor2Col12Major(src, dst, params_->row_, params_->deep_);
}
#endif
}
}
@ -55,11 +46,19 @@ void InitMatrixB(const float *src_ptr, float *dst_ptr, const MatMulParameter *pa
}
for (int i = 0; i < params_->batch; i++) {
const float *src = src_ptr + i * params_->deep_ * params_->col_;
float *dst = dst_ptr + i * params_->deep_ * params_->col_8_;
float *dst = dst_ptr + i * params_->deep_ * params_->col_align_;
#ifdef ENABLE_ARM32
if (params_->b_transpose_) {
RowMajor2Col4Major(src, dst, params_->col_, params_->deep_);
} else {
RowMajor2Row4Major(src, dst, params_->deep_, params_->col_);
}
#else
if (params_->b_transpose_) {
RowMajor2Col8Major(src, dst, params_->col_, params_->deep_);
} else {
RowMajor2Row8Major(src, dst, params_->deep_, params_->col_);
}
#endif
}
}

View File

@ -16,24 +16,6 @@
#include "wrapper/int8/convolution_int8_wrapper.h"
void CheckSupportOptimize(const ConvolutionInt8Args *args) {
int tile_num = 8;
#ifdef ENABLE_ARM32
tile_num = 4;
args->is_optimize_ = false;
#endif
#ifdef ENABLE_ARM64
if (mindspore::lite::IsSupportSDot()) {
matmul_func_ = MatMulRInt8_optimize_handler;
args->is_optimize_ = true;
} else {
tile_num = 4;
args->is_optimize_ = false;
}
#endif
args->conv_param_->tile_num_ = tile_num;
}
int ConvolutionInt8Run(void *cdata, int task_id) {
ConvolutionInt8Args *args = (ConvolutionInt8Args *)cdata;
ConvInt8(args->input_data_, args->packed_input_, args->matmul_input_, args->packed_weight_, args->bias_data_,

View File

@ -36,8 +36,6 @@ typedef struct {
bool is_optimize_;
} ConvolutionInt8Args;
void CheckSupportOptimize(const ConvolutionInt8Args *args);
int ConvolutionInt8Run(void *cdata, int task_id);
#endif // MINDSPORE_LITE_MICRO_INT8_CONVOLUTION_WRAPPER_INT8_WRAPPER_H_