forked from mindspore-Ecosystem/mindspore
sync code of micro to master
This commit is contained in:
parent
fa4c19f938
commit
4faf97f6bd
|
@ -136,6 +136,8 @@ if(PLATFORM_ARM64)
|
|||
COMPONENT ${RUNTIME_COMPONENT_NAME})
|
||||
install(DIRECTORY ${TOP_DIR}/include/api/ DESTINATION ${RUNTIME_INC_DIR}/api
|
||||
COMPONENT ${RUNTIME_COMPONENT_NAME} FILES_MATCHING PATTERN "*.h" PATTERN "ascend* ops*" EXCLUDE)
|
||||
install(DIRECTORY ${TOP_DIR}/mindspore/lite/build/operator_library DESTINATION ${CODEGEN_PKG_NAME}
|
||||
COMPONENT ${CODEGEN_COMPONENT_NAME})
|
||||
if(ENABLE_TOOLS)
|
||||
install(TARGETS benchmark RUNTIME DESTINATION ${RUNTIME_PKG_NAME}/benchmark COMPONENT ${RUNTIME_COMPONENT_NAME})
|
||||
endif()
|
||||
|
@ -157,6 +159,8 @@ elseif(PLATFORM_ARM32)
|
|||
COMPONENT ${RUNTIME_COMPONENT_NAME})
|
||||
install(DIRECTORY ${TOP_DIR}/include/api/ DESTINATION ${RUNTIME_INC_DIR}/api
|
||||
COMPONENT ${RUNTIME_COMPONENT_NAME} FILES_MATCHING PATTERN "*.h" PATTERN "ascend*" EXCLUDE)
|
||||
install(DIRECTORY ${TOP_DIR}/mindspore/lite/build/operator_library DESTINATION ${CODEGEN_PKG_NAME}
|
||||
COMPONENT ${CODEGEN_COMPONENT_NAME})
|
||||
if(ENABLE_TOOLS)
|
||||
install(TARGETS benchmark RUNTIME DESTINATION ${RUNTIME_PKG_NAME}/benchmark COMPONENT ${RUNTIME_COMPONENT_NAME})
|
||||
endif()
|
||||
|
@ -231,6 +235,8 @@ else()
|
|||
install(FILES ${glog_LIBPATH}/libglog.so.0.4.0
|
||||
DESTINATION ${CONVERTER_PKG_NAME}/third_party/glog/lib RENAME libglog.so.0
|
||||
COMPONENT ${CONVERTER_COMPONENT_NAME})
|
||||
install(DIRECTORY ${TOP_DIR}/mindspore/lite/build/operator_library DESTINATION ${CODEGEN_PKG_NAME}
|
||||
COMPONENT ${CODEGEN_COMPONENT_NAME})
|
||||
install(TARGETS codegen RUNTIME DESTINATION ${CODEGEN_PKG_NAME}/
|
||||
COMPONENT ${CODEGEN_COMPONENT_NAME})
|
||||
endif()
|
||||
|
@ -249,7 +255,7 @@ else()
|
|||
endif()
|
||||
set(CPACK_ARCHIVE_COMPONENT_INSTALL ON)
|
||||
if(PLATFORM_ARM64 OR PLATFORM_ARM32)
|
||||
set(CPACK_COMPONENTS_ALL ${RUNTIME_COMPONENT_NAME})
|
||||
set(CPACK_COMPONENTS_ALL ${RUNTIME_COMPONENT_NAME} ${CODEGEN_COMPONENT_NAME})
|
||||
else()
|
||||
set(CPACK_COMPONENTS_ALL ${RUNTIME_COMPONENT_NAME} ${CONVERTER_COMPONENT_NAME} ${CODEGEN_COMPONENT_NAME})
|
||||
endif()
|
||||
|
|
|
@ -34,7 +34,7 @@
|
|||
#define LOG_HDR_FILE_REL_PATH "mindspore/core/utils/log_adapter.h"
|
||||
|
||||
// Get start index of file relative path in __FILE__
|
||||
static constexpr int GetRelPathPos() noexcept {
|
||||
static constexpr size_t GetRelPathPos() noexcept {
|
||||
return sizeof(__FILE__) > sizeof(LOG_HDR_FILE_REL_PATH) ? sizeof(__FILE__) - sizeof(LOG_HDR_FILE_REL_PATH) : 0;
|
||||
}
|
||||
|
||||
|
|
|
@ -89,8 +89,10 @@ if(SUPPORT_TRAIN)
|
|||
else()
|
||||
if(PLATFORM_ARM64)
|
||||
set(RUNTIME_COMPONENT_NAME inference-android-aarch64)
|
||||
set(CODEGEN_COMPONENT_NAME codegen-android-aarch64)
|
||||
elseif(PLATFORM_ARM32)
|
||||
set(RUNTIME_COMPONENT_NAME inference-android-aarch32)
|
||||
set(CODEGEN_COMPONENT_NAME codegen-android-aarch32)
|
||||
elseif(WIN32)
|
||||
if("${X86_64_SIMD}" STREQUAL "off")
|
||||
set(RUNTIME_COMPONENT_NAME inference-win-x64)
|
||||
|
@ -218,7 +220,6 @@ if(ENABLE_CONVERTER)
|
|||
include(${TOP_DIR}/cmake/external_libs/eigen.cmake)
|
||||
include(${TOP_DIR}/cmake/external_libs/protobuf.cmake)
|
||||
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/tools/converter)
|
||||
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/micro/coder)
|
||||
endif()
|
||||
|
||||
if(ENABLE_MINDRT)
|
||||
|
@ -272,6 +273,7 @@ endif()
|
|||
|
||||
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/src)
|
||||
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/nnacl)
|
||||
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/micro/coder)
|
||||
if(ENABLE_TOOLS)
|
||||
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/tools/benchmark)
|
||||
if(SUPPORT_TRAIN)
|
||||
|
|
|
@ -301,6 +301,30 @@ set(LITE_KERNEL_SRC
|
|||
${LITE_DIR}/nnacl/infer/splice_infer.c
|
||||
)
|
||||
|
||||
list(APPEND FILE_SET ${CODER_SRC} ${CODER_OPCODERS_SRC} ${CODER_GENERATOR_SRC}
|
||||
${CODER_ALLOCATOR_SRC} ${LITE_SRC} ${LITE_KERNEL_SRC} ${MINDSPORE_CORE})
|
||||
#### sse
|
||||
if("${X86_64_SIMD}" STREQUAL "sse")
|
||||
set(SSE_SRC
|
||||
${LITE_DIR}/nnacl/intrinsics/sse/sse_common.c
|
||||
${LITE_DIR}/nnacl/intrinsics/sse/PackNHWCToNCHWFp32.c
|
||||
${LITE_DIR}/nnacl/intrinsics/sse/MatMul_Sse.c
|
||||
)
|
||||
set_property(SOURCE ${SSE_SRC} PROPERTY LANGUAGE C)
|
||||
endif()
|
||||
|
||||
#### avx
|
||||
if("${X86_64_SIMD}" STREQUAL "avx")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse4.1 -mavx -mavx2")
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -msse4.1 -mavx -mavx2")
|
||||
set(AVX_SRC
|
||||
${LITE_DIR}/nnacl/intrinsics/avx/common_utils.c
|
||||
${LITE_DIR}/nnacl/intrinsics/sse/sse_common.c
|
||||
${LITE_DIR}/nnacl/intrinsics/sse/MatMul_Sse.c
|
||||
${LITE_DIR}/nnacl/intrinsics/sse/PackNHWCToNCHWFp32.c
|
||||
${LITE_DIR}/nnacl/assembly/avx/MatmulAvx.S
|
||||
)
|
||||
set_property(SOURCE ${AVX_SRC} PROPERTY LANGUAGE C)
|
||||
endif()
|
||||
|
||||
list(APPEND FILE_SET ${CODER_SRC} ${CODER_OPCODERS_SRC} ${CODER_GENERATOR_SRC}
|
||||
${CODER_ALLOCATOR_SRC} ${LITE_SRC} ${LITE_KERNEL_SRC} ${MINDSPORE_CORE} ${SSE_SRC} ${AVX_SRC})
|
||||
|
||||
|
|
|
@ -25,6 +25,7 @@ include(${MICRO_DIR}/cmake/file_list.cmake)
|
|||
include(${MICRO_DIR}/cmake/package_wrapper.cmake)
|
||||
add_subdirectory(operator_library)
|
||||
|
||||
if(NOT PLATFORM_ARM32 AND NOT PLATFORM_ARM64)
|
||||
add_executable(codegen main.cc ${FILE_SET})
|
||||
add_dependencies(codegen fbs_src)
|
||||
add_dependencies(codegen fbs_inner_src)
|
||||
|
@ -32,3 +33,4 @@ target_link_libraries(codegen PRIVATE ${SECUREC_LIBRARY} mindspore::glog)
|
|||
if(NOT WIN32 AND "${CMAKE_BUILD_TYPE}" STREQUAL "Release")
|
||||
add_custom_command(TARGET codegen POST_BUILD COMMAND strip ${CODEGEN_PATH})
|
||||
endif()
|
||||
endif()
|
||||
|
|
|
@ -92,19 +92,17 @@ class MemoryAllocator {
|
|||
* including tensor, workspace
|
||||
*/
|
||||
template <typename T>
|
||||
std::string GetRuntimeAddr(T t, bool is_const = false) {
|
||||
std::string GetRuntimeAddr(T t, bool immutable = false) {
|
||||
if (!t) {
|
||||
return "";
|
||||
}
|
||||
std::string type_info = is_const ? "const " : "";
|
||||
std::string type_name;
|
||||
if (std::type_index(typeid(T)) == std::type_index(typeid(Tensor *))) {
|
||||
type_name = GetTensorDataType(reinterpret_cast<Tensor *>(t)->data_type()) + "*";
|
||||
} else {
|
||||
type_name = GetVariableTypeName<T>();
|
||||
}
|
||||
type_info = wrap(type_info + type_name);
|
||||
|
||||
std::string type_info = wrap(type_name);
|
||||
void *variable = reinterpret_cast<void *>(t);
|
||||
auto item = inputs_addr_.find(variable);
|
||||
if (item != inputs_addr_.end()) {
|
||||
|
@ -133,6 +131,9 @@ class MemoryAllocator {
|
|||
[&variable](const std::pair<Tensor *, std::string> &a) { return variable == a.first; });
|
||||
if (iter != origin_weights_addr_.end()) {
|
||||
saved_weights_addr_.insert(std::make_pair(iter->second, reinterpret_cast<Tensor *>(variable)));
|
||||
if (immutable) {
|
||||
malloc_weights_addr_.insert({reinterpret_cast<Tensor *>(variable), iter->second});
|
||||
}
|
||||
return iter->second;
|
||||
}
|
||||
MS_LOG(ERROR) << "uninitialized memory";
|
||||
|
|
|
@ -134,7 +134,7 @@ void CodeBenchmarkInference(std::ofstream &ofs, const std::string &module_name)
|
|||
<< " uint64_t timeAvg = 0;\n"
|
||||
<< " int loop_count = atoi(argv[3]);\n"
|
||||
<< " printf(\"======Inference Start======\\n\");\n"
|
||||
<< " printf(\"cycles: %d\", loop_count);\n"
|
||||
<< " printf(\"cycles: %d\\n\", loop_count);\n"
|
||||
<< " for (int i = 0; i < loop_count; i++) {\n"
|
||||
<< " uint64_t runBegin = GetTimeUs();\n"
|
||||
<< " " << module_name << "_Inference();\n"
|
||||
|
|
|
@ -48,7 +48,7 @@ void CodeCMakeNetLibrary(std::ofstream &ofs, const std::string &module_name, con
|
|||
}
|
||||
|
||||
ofs << "file(GLOB NET_SRC ${CMAKE_CURRENT_SOURCE_DIR}/*.c)\n"
|
||||
<< "add_library(${PROJ_NAME} STATIC ${NET_SRC})\n";
|
||||
<< "add_library(net STATIC ${NET_SRC})\n";
|
||||
}
|
||||
|
||||
} // namespace mindspore::lite::micro
|
||||
|
|
|
@ -19,9 +19,8 @@
|
|||
|
||||
const char *bench_cmake_lists_txt =
|
||||
"cmake_minimum_required(VERSION 3.14)\n"
|
||||
"project(${PROJ_NAME})\n"
|
||||
"project(benchmark)\n"
|
||||
"\n"
|
||||
"message(\"project name: ${PROJ_NAME}\")\n"
|
||||
"message(\"project name: ${MODEL_LIB_PATH}\")\n"
|
||||
"message(\"architecture cmake file path: ${ARCH_CMAKE_PATH}\")\n"
|
||||
"\n"
|
||||
|
@ -54,14 +53,13 @@ const char *bench_cmake_lists_txt =
|
|||
"endif ()\n"
|
||||
"link_directories(${MODEL_LIB_PATH})\n"
|
||||
"include(benchmark.cmake)\n"
|
||||
"add_executable(${PROJ_NAME}_bench ${SRC_FILES})\n"
|
||||
"target_link_libraries(${PROJ_NAME}_bench ${MODEL_LIB_NAME} -lm -pthread)\n";
|
||||
"add_executable(benchmark ${SRC_FILES})\n"
|
||||
"target_link_libraries(benchmark ${MODEL_LIB_NAME} -lm -pthread)\n";
|
||||
|
||||
const char *src_cmake_lists_txt =
|
||||
"cmake_minimum_required(VERSION 3.14)\n"
|
||||
"project(${PROJ_NAME})\n"
|
||||
"project(net)\n"
|
||||
"\n"
|
||||
"message(\"project name: ${PROJ_NAME}\")\n"
|
||||
"message(\"architecture cmake file path: ${ARCH_CMAKE_PATH}\")\n"
|
||||
"message(\"operator lib path: ${OP_LIB}\")\n"
|
||||
"message(\"operator header path: ${OP_HEADER_PATH}\")\n"
|
||||
|
@ -83,10 +81,11 @@ const char *src_cmake_lists_txt =
|
|||
"else()\n"
|
||||
" set(CMAKE_C_FLAGS \"-fPIC -fPIE -O3 -Werror -fstack-protector-strong -fomit-frame-pointer ${CMAKE_C_FLAGS}\")\n"
|
||||
" set(CMAKE_C_FLAGS_Release \"${CMAKE_C_FLAGS_Release} -O3 -ffunction-sections -Werror -fdata-sections\")\n"
|
||||
" string(REPLACE \"-g\" \"\" CMAKE_C_FLAGS \"${CMAKE_C_FLAGS}\")\n"
|
||||
"endif()\n"
|
||||
"\n"
|
||||
"function(create_library)\n"
|
||||
" add_custom_command(TARGET ${PROJ_NAME}\n"
|
||||
" add_custom_command(TARGET net\n"
|
||||
" POST_BUILD\n"
|
||||
" COMMAND rm -rf tmp\n"
|
||||
" COMMAND mkdir tmp\n"
|
||||
|
@ -97,9 +96,9 @@ const char *src_cmake_lists_txt =
|
|||
" COMMENT \"unzip raw static library ${library_name}\"\n"
|
||||
" )\n"
|
||||
" foreach (object_file ${OP_SRC})\n"
|
||||
" add_custom_command(TARGET ${PROJ_NAME} POST_BUILD COMMAND mv ./tmp/${object_file} .)\n"
|
||||
" add_custom_command(TARGET net POST_BUILD COMMAND mv ./tmp/${object_file} .)\n"
|
||||
" endforeach ()\n"
|
||||
" add_custom_command(TARGET ${PROJ_NAME}\n"
|
||||
" add_custom_command(TARGET net\n"
|
||||
" POST_BUILD\n"
|
||||
" COMMAND ar cr ${library_name} *.o\n"
|
||||
" COMMAND ranlib ${library_name}\n"
|
||||
|
@ -109,7 +108,7 @@ const char *src_cmake_lists_txt =
|
|||
" COMMENT \"generate specified static library ${library_name}\"\n"
|
||||
" )\n"
|
||||
"endfunction(create_library)\n"
|
||||
"string(CONCAT library_name \"lib\" ${PROJ_NAME} \".a\")\n"
|
||||
"string(CONCAT library_name \"lib\" net \".a\")\n"
|
||||
"create_library()\n";
|
||||
|
||||
#endif // MINDSPORE_LITE_MICRO_CODER_GENERATOR_CONST_BLOCKS_CMAKE_LISTS_CODE_H_
|
||||
|
|
|
@ -36,7 +36,7 @@ void CodeCreateThreadPool(std::ofstream &ofs, const std::string &module_name) {
|
|||
" MICRO_ERROR(\"set global thread pool failed\");\n"
|
||||
" return RET_ERROR;\n"
|
||||
" }\n"
|
||||
" MICRO_INFO(\"config: ThreadNum: %d, BindMode: %d\", thread_num, bind_mode);\n";
|
||||
" printf(\"config: ThreadNum: %d, BindMode: %d\\n\", thread_num, bind_mode);\n";
|
||||
}
|
||||
|
||||
void CodeDestroyThreadPool(std::ofstream &ofs) { ofs << " DestroyThreadPool(thread_pool);\n"; }
|
||||
|
|
|
@ -17,9 +17,9 @@
|
|||
#include "coder/generator/component/weight_component.h"
|
||||
#include <memory>
|
||||
#include <utility>
|
||||
#include <algorithm>
|
||||
#include "coder/generator/component/const_blocks/license.h"
|
||||
#include "coder/utils/coder_utils.h"
|
||||
#include "coder/opcoders/parallel.h"
|
||||
|
||||
namespace mindspore::lite::micro {
|
||||
void CodeWeightFileHeader(std::ofstream &ofs, const std::unique_ptr<CoderContext> &ctx) {
|
||||
|
@ -89,7 +89,7 @@ void CodeWeightInitFunc(std::ofstream &ofs, const std::string &module_name, cons
|
|||
<< " if (weight_buffer == NULL) {\n"
|
||||
<< " return RET_ERROR;\n"
|
||||
<< " }\n";
|
||||
|
||||
ofs << " int " << gThreadNum << " = 1;\n\n";
|
||||
ofs << " struct ModelParameter {\n"
|
||||
<< " void *addr;\n"
|
||||
<< " size_t size;\n"
|
||||
|
|
|
@ -82,9 +82,9 @@ int DetectionPostProcessBaseCoder::AllocateBuffer() {
|
|||
MS_CHECK_PTR(params_->decoded_boxes_);
|
||||
params_->nms_candidate_ = allocator_->Malloc(kNumberTypeUInt8, num_boxes_ * sizeof(uint8_t), kWorkspace);
|
||||
MS_CHECK_PTR(params_->nms_candidate_);
|
||||
params_->selected_ = allocator_->Malloc(kNumberTypeInt, num_boxes_ * sizeof(int), kWorkspace);
|
||||
params_->selected_ = allocator_->Malloc(kNumberTypeInt32, num_boxes_ * sizeof(int), kWorkspace);
|
||||
MS_CHECK_PTR(params_->selected_);
|
||||
params_->single_class_indexes_ = allocator_->Malloc(kNumberTypeInt, num_boxes_ * sizeof(int), kWorkspace);
|
||||
params_->single_class_indexes_ = allocator_->Malloc(kNumberTypeInt32, num_boxes_ * sizeof(int), kWorkspace);
|
||||
MS_CHECK_PTR(params_->single_class_indexes_);
|
||||
|
||||
if (params_->use_regular_nms_) {
|
||||
|
@ -92,13 +92,13 @@ int DetectionPostProcessBaseCoder::AllocateBuffer() {
|
|||
allocator_->Malloc(kNumberTypeFloat, (num_boxes_ + params_->max_detections_) * sizeof(float), kWorkspace);
|
||||
MS_CHECK_PTR(params_->scores_);
|
||||
params_->indexes_ =
|
||||
allocator_->Malloc(kNumberTypeInt, (num_boxes_ + params_->max_detections_) * sizeof(int), kWorkspace);
|
||||
allocator_->Malloc(kNumberTypeInt32, (num_boxes_ + params_->max_detections_) * sizeof(int), kWorkspace);
|
||||
MS_CHECK_PTR(params_->indexes_);
|
||||
params_->all_class_scores_ =
|
||||
allocator_->Malloc(kNumberTypeFloat, (num_boxes_ + params_->max_detections_) * sizeof(float), kWorkspace);
|
||||
MS_CHECK_PTR(params_->all_class_scores_);
|
||||
params_->all_class_indexes_ =
|
||||
allocator_->Malloc(kNumberTypeInt, (num_boxes_ + params_->max_detections_) * sizeof(int), kWorkspace);
|
||||
allocator_->Malloc(kNumberTypeInt32, (num_boxes_ + params_->max_detections_) * sizeof(int), kWorkspace);
|
||||
MS_CHECK_PTR(params_->all_class_indexes_);
|
||||
} else {
|
||||
params_->scores_ = allocator_->Malloc(kNumberTypeFloat, num_boxes_ * sizeof(float), kWorkspace);
|
||||
|
|
|
@ -36,7 +36,7 @@ int BiasAddFP32Coder::DoCode(CoderContext *ctx) {
|
|||
return RET_ERROR;
|
||||
}
|
||||
size_t data_size = input_tensor_->ElementsNum();
|
||||
std::string bias_str = allocator_->GetRuntimeAddr(input_tensors_.at(kWeightIndex));
|
||||
std::string bias_str = allocator_->GetRuntimeAddr(input_tensors_.at(kWeightIndex), true);
|
||||
Collect(ctx,
|
||||
{"nnacl/arithmetic.h", "nnacl/nnacl_utils.h", "nnacl/nnacl_common.h", "nnacl/base/arithmetic_base.h",
|
||||
"nnacl/fp32/add_fp32.h", "nnacl/fp32/arithmetic_fp32.h"},
|
||||
|
|
|
@ -183,13 +183,15 @@ int Conv2DINT8Coder::Resize() {
|
|||
int Conv2DINT8Coder::DoCode(CoderContext *const context) {
|
||||
std::vector<std::string> asm_files;
|
||||
if (target_ == kARM32A) {
|
||||
asm_files = {"PreSum4x16Int8Peroc.S", "PreSum4x16Int8Pert.S", "MatmulInt8Neon32.S"};
|
||||
asm_files = {"PreSum4x16Int8Peroc.S", "PreSum4x16Int8Pert.S", "MatmulInt8.S"};
|
||||
} else if (target_ == kARM64) {
|
||||
asm_files = {"PreSum4x16Int8Peroc.S", "PreSum4x16Int8Pert.S", "MatmulInt8Neon64.S"};
|
||||
asm_files = {"PreSum4x16Int8Peroc.S", "PreSum4x16Int8Pert.S", "MatmulInt8.S", "MatmulDpInt8.S"};
|
||||
}
|
||||
Collect(context, {"nnacl/int8/conv_int8.h", "nnacl/common_func.h", "wrapper/int8/convolution_int8_wrapper.h"},
|
||||
Collect(context,
|
||||
{"nnacl/int8/conv_int8.h", "nnacl/common_func.h", "wrapper/int8/convolution_int8_wrapper.h",
|
||||
"wrapper/base/common_wrapper.h", "wrapper/base/optimize_handler_wrapper.h"},
|
||||
{"common_func.c", "pack_int8.c", "conv_int8.c", "winograd_transform.c", "matmul_int8.c", "fixed_point.c",
|
||||
"convolution_int8_wrapper.c", "conv_init_int8_wrapper.c", "thread_pool.c"},
|
||||
"convolution_int8_wrapper.c", "conv_init_int8_wrapper.c", "common_wrapper.c", "optimize_handler_wrapper.c"},
|
||||
asm_files);
|
||||
// call the op function
|
||||
nnacl::NNaclInt8Serializer code;
|
||||
|
@ -202,7 +204,6 @@ int Conv2DINT8Coder::DoCode(CoderContext *const context) {
|
|||
code.CodeBaseStruct("ConvolutionInt8Args", kRunArgs, input_tensor_, packed_input_, matmul_packed_input_,
|
||||
packed_weight_, bias_data_, output_tensor_, filter_zp_ptr_, input_sum_,
|
||||
"(ConvParameter *)&conv_param", matmul_func_, support_optimize_);
|
||||
code.CodeFunction("CheckSupportOptimize", kRunArgsAddr);
|
||||
if (support_parallel_) {
|
||||
code.CodeFunction(kParallelLaunch, gThreadPool, "ConvolutionInt8Run", kRunArgsAddr, gThreadNum);
|
||||
} else {
|
||||
|
|
|
@ -44,10 +44,8 @@ class Conv2DINT8Coder final : public Conv2DBaseCoder {
|
|||
}
|
||||
|
||||
private:
|
||||
int InitWeightBias(CoderContext *ctx);
|
||||
|
||||
void CheckSupportOptimize();
|
||||
|
||||
int InitWeightBias(CoderContext *ctx);
|
||||
int InitTmpBuffer(CoderContext *ctx);
|
||||
|
||||
int Resize();
|
||||
|
@ -70,7 +68,7 @@ class Conv2DINT8Coder final : public Conv2DBaseCoder {
|
|||
int32_t *input_sum_{nullptr};
|
||||
int8_t *matmul_packed_input_{nullptr};
|
||||
|
||||
std::string matmul_func_;
|
||||
std::string matmul_func_{"NULL"};
|
||||
|
||||
std::function<int(nnacl::NNaclInt8Serializer &, const std::string &, const std::string &)> pack_weight_init_{nullptr};
|
||||
};
|
||||
|
|
|
@ -168,9 +168,13 @@ class Serializer {
|
|||
* "int pointer_gen[4] = {1 ,3, 2, 42};\n
|
||||
* const Foo foo_gen = {{1, 2, 3}, pointer_gen, 4};\n"
|
||||
*/
|
||||
template <typename... PARAMETERS>
|
||||
template <bool immutable = true, typename... PARAMETERS>
|
||||
void CodeBaseStruct(const std::string &type, const std::string &name, PARAMETERS... parameters) {
|
||||
if constexpr (immutable) {
|
||||
code << "const " << type << " " << name << " = {";
|
||||
} else {
|
||||
code << type << " " << name << " = {";
|
||||
}
|
||||
GenCode(parameters...);
|
||||
code << "};\n";
|
||||
}
|
||||
|
|
|
@ -22,7 +22,6 @@ endif()
|
|||
set(MICRO_CMAKE_PATH ${MICRO_DIR}/cmake)
|
||||
set(OPERATOR_LIBRARY_PATH ${CMAKE_BINARY_DIR}/operator_library)
|
||||
set(HEADER_PATH "${OPERATOR_LIBRARY_PATH}/include")
|
||||
set(LIB_PATH "${OPERATOR_LIBRARY_PATH}/lib/x86")
|
||||
|
||||
message("===========>start to pack operators' head file")
|
||||
file(REMOVE_RECURSE ${OPERATOR_LIBRARY_PATH})
|
||||
|
@ -36,14 +35,31 @@ file(REMOVE_RECURSE ${HEADER_PATH}/nnacl/assembly)
|
|||
file(REMOVE_RECURSE ${HEADER_PATH}/nnacl/fp16)
|
||||
file(REMOVE_RECURSE ${HEADER_PATH}/nnacl/fp16_grad)
|
||||
file(REMOVE_RECURSE ${HEADER_PATH}/nnacl/fp32_grad)
|
||||
file(REMOVE_RECURSE ${HEADER_PATH}/nnacl/intrinsics)
|
||||
file(REMOVE_RECURSE ${HEADER_PATH}/nnacl/optimize)
|
||||
|
||||
if(PLATFORM_ARM64)
|
||||
set(MICRO_BUILD_ARM64 ON)
|
||||
endif()
|
||||
if(PLATFORM_ARM32)
|
||||
set(MICRO_BUILD_ARM32A ON)
|
||||
endif()
|
||||
|
||||
include(${MICRO_CMAKE_PATH}/package_android.cmake)
|
||||
include(${MICRO_CMAKE_PATH}/package_nnacl.cmake)
|
||||
include(${MICRO_CMAKE_PATH}/package_cmsis.cmake)
|
||||
include(${MICRO_CMAKE_PATH}/package_wrapper.cmake)
|
||||
|
||||
list(APPEND OP_FILES ${NNACL_OPS} ${WRAPPER_SRC} ${RUNTIME_SRC})
|
||||
|
||||
if(PLATFORM_ARM64)
|
||||
set(LIB_PATH "${OPERATOR_LIBRARY_PATH}/lib/arm64")
|
||||
elseif(PLATFORM_ARM32)
|
||||
set(LIB_PATH "${OPERATOR_LIBRARY_PATH}/lib/arm32a")
|
||||
else()
|
||||
set(LIB_PATH "${OPERATOR_LIBRARY_PATH}/lib/x86")
|
||||
list(APPEND OP_FILES ${CMSIS_OPS})
|
||||
endif()
|
||||
|
||||
# generate static library
|
||||
add_library(ops STATIC ${NNACL_OPS} ${CMSIS_OPS} ${WRAPPER_SRC} ${RUNTIME_SRC})
|
||||
add_library(ops STATIC ${OP_FILES})
|
||||
install(TARGETS ops ARCHIVE DESTINATION ${LIB_PATH})
|
||||
|
|
|
@ -0,0 +1,36 @@
|
|||
/*
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "wrapper/base/common_wrapper.h"
|
||||
#ifdef __ANDROID__
|
||||
#include <sys/auxv.h>
|
||||
#include <asm/hwcap.h>
|
||||
#endif
|
||||
|
||||
bool GetSupportOptFlag() {
|
||||
bool status = false;
|
||||
#ifdef ENABLE_ARM64
|
||||
int hwcap_type = 16;
|
||||
// getHwCap
|
||||
uint32_t hwcap = getauxval(hwcap_type);
|
||||
if (hwcap & HWCAP_ASIMDDP) {
|
||||
status = true;
|
||||
} else {
|
||||
status = false;
|
||||
}
|
||||
#endif
|
||||
return status;
|
||||
}
|
|
@ -0,0 +1,24 @@
|
|||
/*
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_LITE_MICRO_CODER_OPERATOR_LIBRARY_COMMON_WRAPPER_H_
|
||||
#define MINDSPORE_LITE_MICRO_CODER_OPERATOR_LIBRARY_COMMON_WRAPPER_H_
|
||||
|
||||
#include "nnacl/op_base.h"
|
||||
|
||||
bool GetSupportOptFlag();
|
||||
|
||||
#endif // MINDSPORE_LITE_MICRO_CODER_OPERATOR_LIBRARY_COMMON_WRAPPER_H_
|
|
@ -0,0 +1,49 @@
|
|||
/*
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "wrapper/base/optimize_handler_wrapper.h"
|
||||
|
||||
extern void MatMulOptR4Int8Neon64(const int8_t *a, const int8_t *b, int *dst, int row4, int col4, int deep16,
|
||||
const int *input_sum, const int *bias);
|
||||
extern void MatmulInt8DpNeon64(const int8_t *a, const int8_t *b, int8_t *dst, int row8, int col8, int deep4,
|
||||
const int *a_sums, const int *bias, int act_min, int act_max, int out_zp,
|
||||
int *multiplier, int *left_shift, int *right_shift, int row, int col, int stride,
|
||||
size_t peroc);
|
||||
extern void MatmulInt8DpOpt(const int8_t *a, const int8_t *b, int8_t *dst, size_t row8, size_t col8, size_t deep4,
|
||||
const int *a_sums, const int *bias, int act_min, int act_max, int out_zp, int *multiplier,
|
||||
int *left_shift, int *right_shift, size_t stride, size_t peroc, int *filter_zp);
|
||||
|
||||
#ifdef ENABLE_ARM64
|
||||
void MatMulR4Int8_optimize_handler(const int8_t *a, const int8_t *b, int *dst, int row4, int col4, int deep16,
|
||||
const int *input_sum, const int *bias) {
|
||||
return MatMulOptR4Int8Neon64(a, b, dst, row4, col4, deep16, input_sum, bias);
|
||||
}
|
||||
|
||||
void MatMulRInt8_optimize_handler(const int8_t *a, const int8_t *b, int8_t *dst, size_t row, size_t col, size_t deep_4,
|
||||
size_t stride, const int32_t *input_sum, const int32_t *bias, int32_t *left_shift,
|
||||
int32_t *right_shift, int32_t *multiplier, int32_t output_zp, int32_t mini,
|
||||
int32_t maxi, size_t per_channel) {
|
||||
return MatmulInt8DpNeon64(a, b, dst, UP_ROUND(row, C8NUM), UP_ROUND(col, C8NUM), deep_4, input_sum, bias, mini, maxi,
|
||||
output_zp, multiplier, left_shift, right_shift, row, col, stride, per_channel);
|
||||
}
|
||||
void MatMulDpInt8_optimize_handler(const int8_t *a, const int8_t *b, int8_t *dst, size_t row, size_t col, size_t deep_4,
|
||||
size_t stride, const int32_t *input_sum, const int32_t *bias, int32_t *left_shift,
|
||||
int32_t *right_shift, int32_t *multiplier, int32_t output_zp, int32_t mini,
|
||||
int32_t maxi, size_t per_channel, int32_t *filter_zp) {
|
||||
return MatmulInt8DpOpt(a, b, dst, row, col, deep_4, input_sum, bias, mini, maxi, output_zp, multiplier, left_shift,
|
||||
right_shift, stride, per_channel, filter_zp);
|
||||
}
|
||||
#endif
|
|
@ -0,0 +1,41 @@
|
|||
/*
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_LITE_MICRO_CODER_OPERATOR_LIBRARY_OPTIMIZE_HANDLER_WRAPPER_H_
|
||||
#define MINDSPORE_LITE_MICRO_CODER_OPERATOR_LIBRARY_OPTIMIZE_HANDLER_WRAPPER_H_
|
||||
|
||||
#include "nnacl/op_base.h"
|
||||
|
||||
#ifdef ENABLE_ARM64
|
||||
void IndirectGemmInt8_optimize_handler(int8_t *dst, const int8_t *src, const int8_t *weight, const int32_t *bias,
|
||||
size_t ksize, size_t ic4, size_t output_channel, size_t offset,
|
||||
const int32_t *input_sum, size_t act_min, size_t act_max, size_t out_zp,
|
||||
int32_t *out_multiplier, int32_t *shift_before, int32_t *shift_after,
|
||||
size_t asymmetric, size_t per_channel, size_t per_channel_offset);
|
||||
void MatMulR4Int8_optimize_handler(const int8_t *a, const int8_t *b, int *dst, int row4, int col4, int deep16,
|
||||
const int *input_sum, const int *bias);
|
||||
|
||||
void MatMulRInt8_optimize_handler(const int8_t *a, const int8_t *b, int8_t *dst, size_t row, size_t col, size_t deep_4,
|
||||
size_t stride, const int32_t *input_sum, const int32_t *bias, int32_t *left_shift,
|
||||
int32_t *right_shift, int32_t *multiplier, int32_t output_zp, int32_t mini,
|
||||
int32_t maxi, size_t per_channel);
|
||||
void MatMulDpInt8_optimize_handler(const int8_t *a, const int8_t *b, int8_t *dst, size_t row, size_t col, size_t deep_4,
|
||||
size_t stride, const int32_t *input_sum, const int32_t *bias, int32_t *left_shift,
|
||||
int32_t *right_shift, int32_t *multiplier, int32_t output_zp, int32_t mini,
|
||||
int32_t maxi, size_t per_channel, int32_t *filter_zp);
|
||||
#endif
|
||||
|
||||
#endif // MINDSPORE_LITE_MICRO_CODER_OPERATOR_LIBRARY_OPTIMIZE_HANDLER_WRAPPER_H_
|
|
@ -22,21 +22,12 @@ void InitMatrixA(const float *src_ptr, float *dst_ptr, const MatMulParameter *pa
|
|||
}
|
||||
for (int i = 0; i < params_->batch; i++) {
|
||||
const float *src = src_ptr + i * params_->deep_ * params_->row_;
|
||||
#ifdef ENABLE_ARM32
|
||||
float *dst = dst_ptr + i * params_->deep_ * params_->row_4_;
|
||||
if (params_->a_transpose_) {
|
||||
RowMajor2Row4Major(src, dst, params_->deep_, params_->row_);
|
||||
} else {
|
||||
RowMajor2Col4Major(src, dst, params_->row_, params_->deep_);
|
||||
}
|
||||
#else
|
||||
float *dst = dst_ptr + i * params_->deep_ * params_->row_12_;
|
||||
float *dst = dst_ptr + i * params_->deep_ * params_->row_align_;
|
||||
if (params_->a_transpose_) {
|
||||
RowMajor2Row12Major(src, dst, params_->deep_, params_->row_);
|
||||
} else {
|
||||
RowMajor2Col12Major(src, dst, params_->row_, params_->deep_);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -55,11 +46,19 @@ void InitMatrixB(const float *src_ptr, float *dst_ptr, const MatMulParameter *pa
|
|||
}
|
||||
for (int i = 0; i < params_->batch; i++) {
|
||||
const float *src = src_ptr + i * params_->deep_ * params_->col_;
|
||||
float *dst = dst_ptr + i * params_->deep_ * params_->col_8_;
|
||||
float *dst = dst_ptr + i * params_->deep_ * params_->col_align_;
|
||||
#ifdef ENABLE_ARM32
|
||||
if (params_->b_transpose_) {
|
||||
RowMajor2Col4Major(src, dst, params_->col_, params_->deep_);
|
||||
} else {
|
||||
RowMajor2Row4Major(src, dst, params_->deep_, params_->col_);
|
||||
}
|
||||
#else
|
||||
if (params_->b_transpose_) {
|
||||
RowMajor2Col8Major(src, dst, params_->col_, params_->deep_);
|
||||
} else {
|
||||
RowMajor2Row8Major(src, dst, params_->deep_, params_->col_);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
|
|
@ -16,24 +16,6 @@
|
|||
|
||||
#include "wrapper/int8/convolution_int8_wrapper.h"
|
||||
|
||||
void CheckSupportOptimize(const ConvolutionInt8Args *args) {
|
||||
int tile_num = 8;
|
||||
#ifdef ENABLE_ARM32
|
||||
tile_num = 4;
|
||||
args->is_optimize_ = false;
|
||||
#endif
|
||||
#ifdef ENABLE_ARM64
|
||||
if (mindspore::lite::IsSupportSDot()) {
|
||||
matmul_func_ = MatMulRInt8_optimize_handler;
|
||||
args->is_optimize_ = true;
|
||||
} else {
|
||||
tile_num = 4;
|
||||
args->is_optimize_ = false;
|
||||
}
|
||||
#endif
|
||||
args->conv_param_->tile_num_ = tile_num;
|
||||
}
|
||||
|
||||
int ConvolutionInt8Run(void *cdata, int task_id) {
|
||||
ConvolutionInt8Args *args = (ConvolutionInt8Args *)cdata;
|
||||
ConvInt8(args->input_data_, args->packed_input_, args->matmul_input_, args->packed_weight_, args->bias_data_,
|
||||
|
|
|
@ -36,8 +36,6 @@ typedef struct {
|
|||
bool is_optimize_;
|
||||
} ConvolutionInt8Args;
|
||||
|
||||
void CheckSupportOptimize(const ConvolutionInt8Args *args);
|
||||
|
||||
int ConvolutionInt8Run(void *cdata, int task_id);
|
||||
|
||||
#endif // MINDSPORE_LITE_MICRO_INT8_CONVOLUTION_WRAPPER_INT8_WRAPPER_H_
|
||||
|
|
Loading…
Reference in New Issue