!29807 [MS][LITE]fix linux arm64 run

Merge pull request !29807 from gongdaguo/fix_arm64_build_master
This commit is contained in:
i-robot 2022-02-09 08:45:08 +00:00 committed by Gitee
commit d3f68ec8b2
No known key found for this signature in database
GPG Key ID: 173E9B9CA92EEF8F
5 changed files with 17 additions and 11 deletions

View File

@ -113,7 +113,7 @@ void FastMul(const int8_t *input0_data, const int8_t *input1_data, int8_t *outpu
zp1 = quant_arg->in_quant_args_[1].zp_;
zp2 = quant_arg->in_quant_args_[0].zp_;
}
#ifdef ENABLE_ARM
#ifdef ENABLE_NENO
int32x4_t output_multiplier_vec = vdupq_n_s32(quant_arg->output_multiplier_);
int32x4_t left_shift_out_vec = vdupq_n_s32(1 << (size_t)quant_arg->shift_left_);
int32x4_t right_shift_out_vec = vdupq_n_s32(-quant_arg->shift_right_);
@ -127,7 +127,7 @@ void FastMul(const int8_t *input0_data, const int8_t *input1_data, int8_t *outpu
#endif
for (int index = 0; index < real_dst_count; ++index) {
int j = 0;
#ifdef ENABLE_ARM
#ifdef ENABLE_NENO
for (; j <= depth - 16; j += 16) {
int8x16_t input0_vec = vld1q_s8(input0_data + j);
int8x16_t input1_vec = vld1q_s8(input1_data);

View File

@ -30,8 +30,10 @@ if(SUPPORT_TRAIN)
endif()
string(REPLACE "-fvisibility=hidden" "-fvisibility=default" CMAKE_C_FLAGS "${CMAKE_C_FLAGS}")
if(NOT PLATFORM_ARM32 AND NOT TARGET_HIMIX AND NOT MACHINE_LINUX_ARM64)
if(MACHINE_LINUX_ARM64)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=armv8.2-a+fp16")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=armv8.2-a+fp16")
elseif(NOT PLATFORM_ARM32 AND NOT TARGET_HIMIX)
list(APPEND SDOT_FILES ${SDOT_SRC})
add_library(nnacl_optimize_mid OBJECT ${SDOT_FILES})
add_dependencies(nnacl_optimize_mid fbs_src)

View File

@ -45,8 +45,8 @@ option(MSLITE_ENABLE_SERVER_INFERENCE "enable inference on server" off)
option(ENABLE_VERBOSE "" off)
option(ENABLE_MODEL_OBF "if support model obfuscation" off)
if(MACHINE_LINUX_ARM64)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=armv8.2-a+fp16")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=armv8.2-a+fp16")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=armv8-a+fp16")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=armv8-a+fp16")
endif()
if(DEFINED ENV{MSLITE_GPU_BACKEND})
@ -150,6 +150,10 @@ endif()
if(MACHINE_LINUX_ARM64)
add_compile_definitions(MACHINE_LINUX_ARM64)
add_compile_definitions(LINUX_RUNTIME)
endif()
if(PLATFORM_X86_64)
add_compile_definitions(LINUX_RUNTIME)
endif()
if(TOOLCHAIN_NAME STREQUAL "himix200")
set(TARGET_HIMIX on)

View File

@ -180,10 +180,10 @@ bool CpuInfo::ArmIsSupportFp16() {
#elif defined(ENABLE_ARM64) && defined(MACHINE_LINUX_ARM64)
const uint32_t hwcap = getauxval(AT_HWCAP);
if (hwcap & HWCAP_FPHP) {
MS_LOG(ERROR) << "Hw cap support FP16, hwcap: 0x" << hwcap;
MS_LOG(DEBUG) << "Hw cap support FP16, hwcap: 0x" << hwcap;
fp16_flag_ = true;
} else {
MS_LOG(ERROR) << "Hw cap NOT support FP16, hwcap: 0x" << hwcap;
MS_LOG(DEBUG) << "Hw cap NOT support FP16, hwcap: 0x" << hwcap;
}
#endif
return fp16_flag_;

View File

@ -22,7 +22,7 @@
#ifndef RUNTIME_PASS_CLIP
#include "src/runtime/runtime_pass.h"
#endif
#if defined(MACHINE_LINUX_ARM64)
#if defined(LINUX_RUNTIME)
#include <malloc.h>
#endif
#include <vector>
@ -718,7 +718,7 @@ int LiteSession::CompileGraph(Model *model) {
}
is_running_.store(false);
#if defined(MACHINE_LINUX_ARM64)
#if defined(LINUX_RUNTIME)
(void)malloc_trim(0);
#endif
return RET_OK;
@ -1314,7 +1314,7 @@ int LiteSession::Resize(const std::vector<mindspore::tensor::MSTensor *> &inputs
#endif
is_running_.store(false);
#if defined(MACHINE_LINUX_ARM64)
#if defined(LINUX_RUNTIME)
(void)malloc_trim(0);
#endif
return RET_OK;